using multi_gpu consistently (#8446)

* s|multiple_gpu|multi_gpu|g; s|multigpu|multi_gpu|g' * doc
2020-11-10 10:23:58 -08:00
parent b93569457f
commit 02bdfc0251
22 changed files with 117 additions and 117 deletions
--- a/examples/seq2seq/test_bash_script.py
+++ b/examples/seq2seq/test_bash_script.py
@@ -13,7 +13,7 @@ from distillation import BartSummarizationDistiller, distill_main
 from finetune import SummarizationModule, main
 from transformers import MarianMTModel
 from transformers.file_utils import cached_path
-from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multigpu_but_fix_me, slow
+from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multi_gpu_but_fix_me, slow
 from utils import load_json


@@ -32,7 +32,7 @@ class TestMbartCc25Enro(TestCasePlus):

    @slow
    @require_torch_gpu
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_model_download(self):
        """This warms up the cache so that we can time the next test without including download time, which varies between machines."""
        MarianMTModel.from_pretrained(MARIAN_MODEL)
@@ -40,7 +40,7 @@ class TestMbartCc25Enro(TestCasePlus):
    # @timeout_decorator.timeout(1200)
    @slow
    @require_torch_gpu
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_train_mbart_cc25_enro_script(self):
        env_vars_to_replace = {
            "$MAX_LEN": 64,
@@ -75,7 +75,7 @@ class TestMbartCc25Enro(TestCasePlus):
            --num_sanity_val_steps 0
            --eval_beams 2
        """.split()
-        # XXX: args.gpus > 1 : handle multigpu in the future
+        # XXX: args.gpus > 1 : handle multi_gpu in the future

        testargs = ["finetune.py"] + bash_script.split() + args
        with patch.object(sys, "argv", testargs):
@@ -129,7 +129,7 @@ class TestDistilMarianNoTeacher(TestCasePlus):
    @timeout_decorator.timeout(600)
    @slow
    @require_torch_gpu
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_opus_mt_distill_script(self):
        data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro"
        env_vars_to_replace = {
@@ -172,7 +172,7 @@ class TestDistilMarianNoTeacher(TestCasePlus):
            parser = pl.Trainer.add_argparse_args(parser)
            parser = BartSummarizationDistiller.add_model_specific_args(parser, os.getcwd())
            args = parser.parse_args()
-            # assert args.gpus == gpus THIS BREAKS for multigpu
+            # assert args.gpus == gpus THIS BREAKS for multi_gpu

            model = distill_main(args)

--- a/examples/seq2seq/test_datasets.py
+++ b/examples/seq2seq/test_datasets.py
@@ -11,7 +11,7 @@ from save_len_file import save_len_file
 from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir
 from transformers import AutoTokenizer
 from transformers.modeling_bart import shift_tokens_right
-from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, slow
+from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me, slow
 from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset


@@ -30,7 +30,7 @@ class TestAll(TestCasePlus):
        ],
    )
    @slow
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_seq2seq_dataset_truncation(self, tok_name):
        tokenizer = AutoTokenizer.from_pretrained(tok_name)
        tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir())
@@ -70,7 +70,7 @@ class TestAll(TestCasePlus):
            break  # No need to test every batch

    @parameterized.expand([BART_TINY, BERT_BASE_CASED])
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_legacy_dataset_truncation(self, tok):
        tokenizer = AutoTokenizer.from_pretrained(tok)
        tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir())
@@ -95,7 +95,7 @@ class TestAll(TestCasePlus):
            assert max_len_target > trunc_target  # Truncated
            break  # No need to test every batch

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_pack_dataset(self):
        tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25")

@@ -114,7 +114,7 @@ class TestAll(TestCasePlus):
        assert orig_paths == new_paths

    @pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq")
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_dynamic_batch_size(self):
        if not FAIRSEQ_AVAILABLE:
            return
@@ -139,7 +139,7 @@ class TestAll(TestCasePlus):
        if failures:
            raise AssertionError(f"too many tokens in {len(failures)} batches")

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_sortish_sampler_reduces_padding(self):
        ds, _, tokenizer = self._get_dataset(max_len=512)
        bs = 2
@@ -179,7 +179,7 @@ class TestAll(TestCasePlus):
        )
        return ds, max_tokens, tokenizer

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_distributed_sortish_sampler_splits_indices_between_procs(self):
        ds, max_tokens, tokenizer = self._get_dataset()
        ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False))
@@ -195,7 +195,7 @@ class TestAll(TestCasePlus):
            PEGASUS_XSUM,
        ],
    )
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_dataset_kwargs(self, tok_name):
        tokenizer = AutoTokenizer.from_pretrained(tok_name)
        if tok_name == MBART_TINY:
--- a/examples/seq2seq/test_fsmt_bleu_score.py
+++ b/examples/seq2seq/test_fsmt_bleu_score.py
@@ -22,7 +22,7 @@ from transformers import FSMTForConditionalGeneration, FSMTTokenizer
 from transformers.testing_utils import (
    get_tests_dir,
    require_torch,
-    require_torch_non_multigpu_but_fix_me,
+    require_torch_non_multi_gpu_but_fix_me,
    slow,
    torch_device,
 )
@@ -54,7 +54,7 @@ class ModelEvalTester(unittest.TestCase):
        ]
    )
    @slow
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_bleu_scores(self, pair, min_bleu_score):
        # note: this test is not testing the best performance since it only evals a small batch
        # but it should be enough to detect a regression in the output quality
--- a/examples/seq2seq/test_make_student.py
+++ b/examples/seq2seq/test_make_student.py
@@ -4,7 +4,7 @@ import unittest
 from make_student import create_student_by_copying_alternating_layers
 from transformers import AutoConfig
 from transformers.file_utils import cached_property
-from transformers.testing_utils import require_torch, require_torch_non_multigpu_but_fix_me
+from transformers.testing_utils import require_torch, require_torch_non_multi_gpu_but_fix_me


 TINY_BART = "sshleifer/bart-tiny-random"
@@ -17,28 +17,28 @@ class MakeStudentTester(unittest.TestCase):
    def teacher_config(self):
        return AutoConfig.from_pretrained(TINY_BART)

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_valid_t5(self):
        student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1)
        self.assertEqual(student.config.num_hidden_layers, 1)

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_asymmetric_t5(self):
        student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None)

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_same_decoder_small_encoder(self):
        student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None)
        self.assertEqual(student.config.encoder_layers, 1)
        self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers)

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_small_enc_small_dec(self):
        student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1)
        self.assertEqual(student.config.encoder_layers, 1)
        self.assertEqual(student.config.decoder_layers, 1)

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_raises_assert(self):
        with self.assertRaises(AssertionError):
            create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None)
--- a/examples/seq2seq/test_seq2seq_examples.py
+++ b/examples/seq2seq/test_seq2seq_examples.py
@@ -24,7 +24,7 @@ from transformers.testing_utils import (
    CaptureStdout,
    TestCasePlus,
    require_torch_gpu,
-    require_torch_non_multigpu_but_fix_me,
+    require_torch_non_multi_gpu_but_fix_me,
    slow,
 )
 from utils import ROUGE_KEYS, label_smoothed_nll_loss, lmap, load_json
@@ -133,7 +133,7 @@ class TestSummarizationDistiller(TestCasePlus):

    @slow
    @require_torch_gpu
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_hub_configs(self):
        """I put require_torch_gpu cause I only want this to run with self-scheduled."""

@@ -151,12 +151,12 @@ class TestSummarizationDistiller(TestCasePlus):
                failures.append(m)
        assert not failures, f"The following models could not be loaded through AutoConfig: {failures}"

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_distill_no_teacher(self):
        updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True)
        self._test_distiller_cli(updates)

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_distill_checkpointing_with_teacher(self):
        updates = dict(
            student_encoder_layers=2,
@@ -181,7 +181,7 @@ class TestSummarizationDistiller(TestCasePlus):
        convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new)
        assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin"))

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_loss_fn(self):
        model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True)
        input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
@@ -202,7 +202,7 @@ class TestSummarizationDistiller(TestCasePlus):
            # TODO: understand why this breaks
            self.assertEqual(nll_loss, model_computed_loss)

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_distill_mbart(self):
        updates = dict(
            student_encoder_layers=2,
@@ -227,7 +227,7 @@ class TestSummarizationDistiller(TestCasePlus):
        assert len(all_files) > 2
        self.assertEqual(len(transformer_ckpts), 2)

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_distill_t5(self):
        updates = dict(
            student_encoder_layers=1,
@@ -309,21 +309,21 @@ class TestTheRest(TestCasePlus):

    # test one model to quickly (no-@slow) catch simple problems and do an
    # extensive testing of functionality with multiple models as @slow separately
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_run_eval(self):
        self.run_eval_tester(T5_TINY)

    # any extra models should go into the list here - can be slow
    @parameterized.expand([BART_TINY, MBART_TINY])
    @slow
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_run_eval_slow(self, model):
        self.run_eval_tester(model)

    # testing with 2 models to validate: 1. translation (t5) 2. summarization (mbart)
    @parameterized.expand([T5_TINY, MBART_TINY])
    @slow
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_run_eval_search(self, model):
        input_file_name = Path(self.get_auto_remove_tmp_dir()) / "utest_input.source"
        output_file_name = input_file_name.parent / "utest_output.txt"
@@ -374,7 +374,7 @@ class TestTheRest(TestCasePlus):
    @parameterized.expand(
        [T5_TINY, BART_TINY, MBART_TINY, MARIAN_TINY, FSMT_TINY],
    )
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_finetune(self, model):
        args_d: dict = CHEAP_ARGS.copy()
        task = "translation" if model in [MBART_TINY, MARIAN_TINY, FSMT_TINY] else "summarization"
@@ -426,7 +426,7 @@ class TestTheRest(TestCasePlus):
        assert isinstance(example_batch, dict)
        assert len(example_batch) >= 4

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_finetune_extra_model_args(self):
        args_d: dict = CHEAP_ARGS.copy()

@@ -477,7 +477,7 @@ class TestTheRest(TestCasePlus):
            model = main(args)
        assert str(excinfo.value) == f"model config doesn't have a `{unsupported_param}` attribute"

-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_finetune_lr_schedulers(self):
        args_d: dict = CHEAP_ARGS.copy()

--- a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py
+++ b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py
@@ -8,7 +8,7 @@ from transformers.testing_utils import (
    execute_subprocess_async,
    get_gpu_count,
    require_torch_gpu,
-    require_torch_multigpu,
+    require_torch_multi_gpu,
    slow,
 )

@@ -21,8 +21,8 @@ class TestSummarizationDistillerMultiGPU(TestCasePlus):
    def setUpClass(cls):
        return cls

-    @require_torch_multigpu
-    def test_multigpu(self):
+    @require_torch_multi_gpu
+    def test_multi_gpu(self):

        updates = dict(
            no_teacher=True,
--- a/examples/seq2seq/test_tatoeba_conversion.py
+++ b/examples/seq2seq/test_tatoeba_conversion.py
@@ -4,7 +4,7 @@ import unittest

 from transformers.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter
 from transformers.file_utils import cached_property
-from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow
+from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow


@unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.")
@@ -15,12 +15,12 @@ class TatoebaConversionTester(unittest.TestCase):
        return TatoebaConverter(save_dir=tmp_dir)

    @slow
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_resolver(self):
        self.resolver.convert_models(["heb-eng"])

    @slow
-    @require_torch_non_multigpu_but_fix_me
+    @require_torch_non_multi_gpu_but_fix_me
    def test_model_card(self):
        content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True)
        assert mmeta["long_pair"] == "heb-eng"