multi-gpu training also should be after apex fp16（squad）

2019-07-26 15:23:29 +08:00
parent adb3ef6368
commit f0aeb7a814
1 changed files with 4 additions and 2 deletions
--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -101,6 +101,10 @@ def train(args, train_dataset, model, tokenizer):
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
        model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level)
    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)
    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
@@ -457,8 +461,6 @@ def main():
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab
    model.to(args.device)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)
    logger.info("Training/evaluation parameters %s", args)