diff --git a/examples/contrib/run_swag.py b/examples/contrib/run_swag.py index 497ddeca9d..96a16d8df5 100644 --- a/examples/contrib/run_swag.py +++ b/examples/contrib/run_swag.py @@ -622,7 +622,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/distillation/run_squad_w_distillation.py b/examples/distillation/run_squad_w_distillation.py index c32af61bcb..3fabe23ba2 100644 --- a/examples/distillation/run_squad_w_distillation.py +++ b/examples/distillation/run_squad_w_distillation.py @@ -720,7 +720,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/hans/test_hans.py b/examples/hans/test_hans.py index 40c2a1bd3a..a5d4e76149 100644 --- a/examples/hans/test_hans.py +++ b/examples/hans/test_hans.py @@ -520,7 +520,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/mm-imdb/run_mmimdb.py b/examples/mm-imdb/run_mmimdb.py index c7e9f7b47e..ab60b40951 100644 --- a/examples/mm-imdb/run_mmimdb.py +++ b/examples/mm-imdb/run_mmimdb.py @@ -492,7 +492,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/ner/run_ner.py b/examples/ner/run_ner.py index 73b43aee97..c32b3af226 100644 --- a/examples/ner/run_ner.py +++ b/examples/ner/run_ner.py @@ -557,7 +557,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/run_bertology.py b/examples/run_bertology.py index acac56128a..d18b8bc3a2 100644 --- a/examples/run_bertology.py +++ b/examples/run_bertology.py @@ -338,7 +338,7 @@ def main(): # Setup devices and distributed training if args.local_rank == -1 or args.no_cuda: args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) args.device = torch.device("cuda", args.local_rank) diff --git a/examples/run_generation.py b/examples/run_generation.py index 0652567b6b..3f90ee5833 100644 --- a/examples/run_generation.py +++ b/examples/run_generation.py @@ -189,7 +189,7 @@ def main(): args = parser.parse_args() args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() set_seed(args) diff --git a/examples/run_glue.py b/examples/run_glue.py index f3c31b0c06..f5bbde9031 100644 --- a/examples/run_glue.py +++ b/examples/run_glue.py @@ -575,7 +575,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/run_language_modeling.py b/examples/run_language_modeling.py index e0a8929927..c66cc8978f 100644 --- a/examples/run_language_modeling.py +++ b/examples/run_language_modeling.py @@ -663,7 +663,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/run_multiple_choice.py b/examples/run_multiple_choice.py index 72337c110f..c4f90bbad7 100644 --- a/examples/run_multiple_choice.py +++ b/examples/run_multiple_choice.py @@ -535,7 +535,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/run_squad.py b/examples/run_squad.py index f94fb22098..523093e1bb 100644 --- a/examples/run_squad.py +++ b/examples/run_squad.py @@ -725,7 +725,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/examples/run_xnli.py b/examples/run_xnli.py index 0b9e559f1b..9dcae8568f 100644 --- a/examples/run_xnli.py +++ b/examples/run_xnli.py @@ -530,7 +530,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) diff --git a/templates/adding_a_new_example_script/run_xxx.py b/templates/adding_a_new_example_script/run_xxx.py index 6de065ce65..20f4b7360b 100644 --- a/templates/adding_a_new_example_script/run_xxx.py +++ b/templates/adding_a_new_example_script/run_xxx.py @@ -594,7 +594,7 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank)