Remove old code for PyTorch, Accelerator and tokenizers (#37234)

* Remove unneeded library version checks Signed-off-by: cyy <cyyever@outlook.com> * Remove PyTorch condition Signed-off-by: cyy <cyyever@outlook.com> * Remove PyTorch condition Signed-off-by: cyy <cyyever@outlook.com> * Fix ROCm get_device_capability Signed-off-by: cyy <cyyever@outlook.com> * Revert "Fix ROCm get_device_capability" This reverts commit 0e756434bd7e74ffd73de5500476072b096570a6. * Remove unnecessary check Signed-off-by: cyy <cyyever@outlook.com> * Revert changes Signed-off-by: cyy <cyyever@outlook.com> --------- Signed-off-by: cyy <cyyever@outlook.com>
2025-04-11 02:54:21 +08:00
parent 7ff896c0f2
commit 371c44d0ef
42 changed files with 53 additions and 178 deletions
--- a/tests/fsdp/test_fsdp.py
+++ b/tests/fsdp/test_fsdp.py
@@ -323,7 +323,6 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon):

    @require_torch_multi_accelerator
    @slow
-    @require_fsdp
    @require_fsdp_v2_version
    @require_accelerate_fsdp2
    def test_accelerate_fsdp2_integration(self):
--- a/tests/models/bert/test_modeling_bert.py
+++ b/tests/models/bert/test_modeling_bert.py
@@ -510,7 +510,6 @@ class BertModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/bert_generation/test_modeling_bert_generation.py
+++ b/tests/models/bert_generation/test_modeling_bert_generation.py
@@ -273,7 +273,6 @@ class BertGenerationEncoderTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
        self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/big_bird/test_modeling_big_bird.py
+++ b/tests/models/big_bird/test_modeling_big_bird.py
@@ -506,7 +506,6 @@ class BigBirdModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/chinese_clip/test_modeling_chinese_clip.py
+++ b/tests/models/chinese_clip/test_modeling_chinese_clip.py
@@ -354,7 +354,6 @@ class ChineseCLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/data2vec/test_modeling_data2vec_text.py
+++ b/tests/models/data2vec/test_modeling_data2vec_text.py
@@ -409,7 +409,6 @@ class Data2VecTextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTes
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/ernie/test_modeling_ernie.py
+++ b/tests/models/ernie/test_modeling_ernie.py
@@ -492,7 +492,6 @@ class ErnieModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/gpt_neox/test_modeling_gpt_neox.py
+++ b/tests/models/gpt_neox/test_modeling_gpt_neox.py
@@ -306,7 +306,6 @@ class GPTNeoXModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
        self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder()

        input_mask = None
--- a/tests/models/gpt_neox_japanese/test_modeling_gpt_neox_japanese.py
+++ b/tests/models/gpt_neox_japanese/test_modeling_gpt_neox_japanese.py
@@ -223,7 +223,6 @@ class GPTNeoXModelJapaneseTest(ModelTesterMixin, GenerationTesterMixin, Pipeline
        self.model_tester.create_and_check_model_as_decoder(config, input_ids, input_mask)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs_for_decoder()

        input_mask = None
--- a/tests/models/hubert/test_modeling_hubert.py
+++ b/tests/models/hubert/test_modeling_hubert.py
@@ -23,7 +23,6 @@ import pytest

 from transformers import HubertConfig, is_torch_available
 from transformers.testing_utils import require_soundfile, require_torch, slow, torch_device
-from transformers.utils import is_torch_fx_available

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import (
@@ -48,8 +47,7 @@ if is_torch_available():
    )
    from transformers.models.hubert.modeling_hubert import _compute_mask_indices

-if is_torch_fx_available():
-    from transformers.utils.fx import symbolic_trace
+from transformers.utils.fx import symbolic_trace


 class HubertModelTester:
@@ -438,8 +436,8 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        # TODO: fix it
        self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")

-        if not is_torch_fx_available() or not self.fx_compatible:
-            self.skipTest(reason="torch fx is not available or not compatible with this model")
+        if not self.fx_compatible:
+            self.skipTest(reason="torch fx is not compatible with this model")

        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.return_dict = False
--- a/tests/models/mt5/test_modeling_mt5.py
+++ b/tests/models/mt5/test_modeling_mt5.py
@@ -27,7 +27,7 @@ from transformers.testing_utils import (
    slow,
    torch_device,
 )
-from transformers.utils import is_torch_fx_available
+from transformers.utils.fx import symbolic_trace

 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -35,9 +35,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
 from ...test_pipeline_mixin import PipelineTesterMixin


-if is_torch_fx_available():
-    from transformers.utils.fx import symbolic_trace
-
 if is_torch_available():
    import torch
    import torch.nn.functional as F
@@ -598,8 +595,8 @@ class MT5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
        return False

    def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
-        if not is_torch_fx_available() or not self.fx_compatible:
-            self.skipTest(reason="torch.fx is not available or not compatible with this model")
+        if not self.fx_compatible:
+            self.skipTest(reason="torch.fx is not compatible with this model")

        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.return_dict = False
--- a/tests/models/rembert/test_modeling_rembert.py
+++ b/tests/models/rembert/test_modeling_rembert.py
@@ -416,7 +416,6 @@ class RemBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/roberta/test_modeling_roberta.py
+++ b/tests/models/roberta/test_modeling_roberta.py
@@ -417,7 +417,6 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/roberta_prelayernorm/test_modeling_roberta_prelayernorm.py
+++ b/tests/models/roberta_prelayernorm/test_modeling_roberta_prelayernorm.py
@@ -421,7 +421,6 @@ class RobertaPreLayerNormModelTest(ModelTesterMixin, GenerationTesterMixin, Pipe

    # Copied from tests.models.roberta.test_modeling_roberta.RobertaModelTest.test_model_as_decoder_with_default_input_mask
    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/roc_bert/test_modeling_roc_bert.py
+++ b/tests/models/roc_bert/test_modeling_roc_bert.py
@@ -664,7 +664,6 @@ class RoCBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/roformer/test_modeling_roformer.py
+++ b/tests/models/roformer/test_modeling_roformer.py
@@ -433,7 +433,6 @@ class RoFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/t5/test_modeling_t5.py
+++ b/tests/models/t5/test_modeling_t5.py
@@ -32,7 +32,8 @@ from transformers.testing_utils import (
    slow,
    torch_device,
 )
-from transformers.utils import cached_property, is_torch_fx_available
+from transformers.utils import cached_property
+from transformers.utils.fx import symbolic_trace

 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -40,10 +41,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
 from ...test_pipeline_mixin import PipelineTesterMixin


-if is_torch_fx_available():
-    from transformers.utils.fx import symbolic_trace
-
-
 if is_torch_available():
    import torch
    import torch.nn.functional as F
@@ -603,8 +600,8 @@ class T5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
        return False

    def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
-        if not is_torch_fx_available() or not self.fx_compatible:
-            self.skipTest(reason="torch.fx is not available or not compatible with this model")
+        if not self.fx_compatible:
+            self.skipTest(reason="torch.fx is not compatible with this model")

        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.return_dict = False
--- a/tests/models/umt5/test_modeling_umt5.py
+++ b/tests/models/umt5/test_modeling_umt5.py
@@ -27,7 +27,7 @@ from transformers.testing_utils import (
    slow,
    torch_device,
 )
-from transformers.utils import is_torch_fx_available
+from transformers.utils.fx import symbolic_trace

 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -35,10 +35,6 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_ten
 from ...test_pipeline_mixin import PipelineTesterMixin


-if is_torch_fx_available():
-    from transformers.utils.fx import symbolic_trace
-
-
 if is_torch_available():
    import torch
    import torch.nn.functional as F
@@ -300,8 +296,8 @@ class UMT5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
        return False

    def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
-        if not is_torch_fx_available() or not self.fx_compatible:
-            self.skipTest(reason="torch fx is not available or not compatible with this model")
+        if not self.fx_compatible:
+            self.skipTest(reason="torch fx is not compatible with this model")

        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.return_dict = False
--- a/tests/models/wav2vec2/test_modeling_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py
@@ -42,7 +42,6 @@ from transformers.testing_utils import (
    slow,
    torch_device,
 )
-from transformers.utils import is_torch_fx_available

 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import (
@@ -90,8 +89,7 @@ if is_pyctcdecode_available():
    from transformers.models.wav2vec2_with_lm import processing_wav2vec2_with_lm


-if is_torch_fx_available():
-    from transformers.utils.fx import symbolic_trace
+from transformers.utils.fx import symbolic_trace


 def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
@@ -716,8 +714,8 @@ class Wav2Vec2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
        # TODO: fix it
        self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")

-        if not is_torch_fx_available() or not self.fx_compatible:
-            self.skipTest(reason="torch fx not available or not compatible with this model")
+        if not self.fx_compatible:
+            self.skipTest(reason="torch fx is not compatible with this model")

        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.return_dict = False
--- a/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py
+++ b/tests/models/xlm_roberta_xl/test_modeling_xlm_roberta_xl.py
@@ -425,7 +425,6 @@ class XLMRobertaXLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTes
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/models/xmod/test_modeling_xmod.py
+++ b/tests/models/xmod/test_modeling_xmod.py
@@ -420,7 +420,6 @@ class XmodModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
-        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -101,7 +101,6 @@ from transformers.utils import (
    is_accelerate_available,
    is_torch_bf16_available_on_device,
    is_torch_fp16_available_on_device,
-    is_torch_fx_available,
    is_torch_sdpa_available,
 )
 from transformers.utils.generic import ContextManagers
@@ -125,8 +124,8 @@ if is_torch_available():
    from transformers.modeling_utils import load_state_dict, no_init_weights
    from transformers.pytorch_utils import id_tensor_storage

-if is_torch_fx_available():
-    from transformers.utils.fx import _FX_SUPPORTED_MODELS_WITH_KV_CACHE, symbolic_trace
+from transformers.utils.fx import _FX_SUPPORTED_MODELS_WITH_KV_CACHE, symbolic_trace
+

 if is_deepspeed_available():
    import deepspeed
@@ -1190,10 +1189,8 @@ class ModelTesterMixin:
        self._create_and_check_torch_fx_tracing(config, inputs_dict, output_loss=True)

    def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
-        if not is_torch_fx_available() or not self.fx_compatible:
-            self.skipTest(
-                f"Either torch.fx is not available, or the model type {config.model_type} is not compatible with torch.fx"
-            )
+        if not self.fx_compatible:
+            self.skipTest(f"The model type {config.model_type} is not compatible with torch.fx")

        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.return_dict = False
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -99,7 +99,6 @@ from transformers.testing_utils import (
    require_torch_tensorrt_fx,
    require_torch_tf32,
    require_torch_up_to_2_accelerators,
-    require_torchdynamo,
    require_vision,
    require_wandb,
    run_first,
@@ -3994,10 +3993,9 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):

    @require_non_xpu
    @require_torch_non_multi_gpu
-    @require_torchdynamo
    @require_torch_tensorrt_fx
    def test_torchdynamo_full_eval(self):
-        import torchdynamo
+        from torch import _dynamo as torchdynamo

        # torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu
        n_gpus = get_gpu_count()
@@ -4017,30 +4015,35 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
            del trainer

            # 2. TorchDynamo eager
-            trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="eager", output_dir=tmp_dir)
+            trainer = get_regression_trainer(
+                a=a, b=b, eval_len=eval_len, torch_compile_backend="eager", output_dir=tmp_dir
+            )
            metrics = trainer.evaluate()
            self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
            del trainer
            torchdynamo.reset()

            # 3. TorchDynamo nvfuser
-            trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="nvfuser", output_dir=tmp_dir)
+            trainer = get_regression_trainer(
+                a=a, b=b, eval_len=eval_len, torch_compile_backend="nvfuser", output_dir=tmp_dir
+            )
            metrics = trainer.evaluate()
            self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
            torchdynamo.reset()

            # 4. TorchDynamo fx2trt
-            trainer = get_regression_trainer(a=a, b=b, eval_len=eval_len, torchdynamo="fx2trt", output_dir=tmp_dir)
+            trainer = get_regression_trainer(
+                a=a, b=b, eval_len=eval_len, torch_compile_backend="fx2trt", output_dir=tmp_dir
+            )
            metrics = trainer.evaluate()
            self.assertAlmostEqual(metrics["eval_loss"], original_eval_loss)
            torchdynamo.reset()

-    @unittest.skip(reason="torch 2.0.0 gives `ModuleNotFoundError: No module named 'torchdynamo'`.")
    @require_torch_non_multi_gpu
-    @require_torchdynamo
+    @require_torch_gpu
    def test_torchdynamo_memory(self):
        # torchdynamo at the moment doesn't support DP/DDP, therefore require a single gpu
-        import torchdynamo
+        from torch import _dynamo as torchdynamo

        class CustomTrainer(Trainer):
            def compute_loss(self, model, inputs, return_outputs=False):
@@ -4085,7 +4088,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
        with tempfile.TemporaryDirectory() as tmp_dir:
            a = torch.ones(1024, 1024, device="cuda", requires_grad=True)
            a.grad = None
-            args = TrainingArguments(output_dir=tmp_dir, torchdynamo="nvfuser")
+            args = TrainingArguments(output_dir=tmp_dir, torch_compile_backend="nvfuser")
            trainer = CustomTrainer(model=mod, args=args)
            # warmup
            for _ in range(10):
--- a/tests/trainer/test_trainer_fsdp.py
+++ b/tests/trainer/test_trainer_fsdp.py
@@ -21,7 +21,6 @@ from transformers.testing_utils import (
    get_torch_dist_unique_port,
    require_accelerate,
    require_fp8,
-    require_fsdp,
    require_torch_multi_accelerator,
    run_first,
    torch_device,
@@ -68,7 +67,6 @@ if is_torch_available():
 class TestFSDPTrainer(TestCasePlus):
    @require_torch_multi_accelerator
    @require_accelerate
-    @require_fsdp
    @run_first
    def test_trainer(self):
        output_dir = self.get_auto_remove_tmp_dir()
@@ -95,7 +93,6 @@ class TestFSDPTrainer(TestCasePlus):
 class TestFSDPTrainerFP8(TestCasePlus):
    @require_torch_multi_accelerator
    @require_accelerate
-    @require_fsdp
    @require_fp8
    @run_first
    def test_trainer(self):
@@ -125,7 +122,6 @@ class TestFSDPTrainerFP8(TestCasePlus):
 class TestFSDPTrainerWrap(TestCasePlus):
    @require_torch_multi_accelerator
    @require_accelerate
-    @require_fsdp
    @run_first
    def test_trainer(self):
        output_dir = self.get_auto_remove_tmp_dir()
--- a/tests/utils/test_modeling_utils.py
+++ b/tests/utils/test_modeling_utils.py
@@ -81,7 +81,6 @@ from transformers.utils.import_utils import (
    is_tf_available,
    is_torch_npu_available,
    is_torch_sdpa_available,
-    is_torchdynamo_available,
 )


@@ -1483,8 +1482,6 @@ class ModelUtilsTest(TestCasePlus):
                    model.warn_if_padding_and_no_attention_mask(input_ids, attention_mask=None)
            self.assertIn("You may ignore this warning if your `pad_token_id`", cl.out)

-        if not is_torchdynamo_available():
-            self.skipTest(reason="torchdynamo is not available")
        with self.subTest("Ensure that the warning code is skipped when compiling with torchdynamo."):
            logger.warning_once.cache_clear()
            from torch._dynamo import config, testing
--- a/tests/utils/test_versions_utils.py
+++ b/tests/utils/test_versions_utils.py
@@ -86,7 +86,7 @@ class DependencyVersionCheckTest(TestCasePlus):

    def test_python(self):
        # matching requirement
-        require_version("python>=3.6.0")
+        require_version("python>=3.9.0")

        # not matching requirements
        for req in ["python>9.9.9", "python<3.0.0"]: