[tests] make cuda-only tests device-agnostic (#35607)

* intial commit * remove unrelated files * further remove * Update test_trainer.py * fix style
2025-01-13 21:48:39 +08:00
parent e6f9b03464
commit 2fa876d2d8
18 changed files with 57 additions and 47 deletions
--- a/tests/models/blip_2/test_modeling_blip_2.py
+++ b/tests/models/blip_2/test_modeling_blip_2.py
@@ -27,6 +27,7 @@ from parameterized import parameterized
 from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig
 from transformers.testing_utils import (
    require_torch,
+    require_torch_accelerator,
    require_torch_fp16,
    require_torch_gpu,
    require_torch_multi_accelerator,
@@ -1565,7 +1566,7 @@ class Blip2TextModelWithProjectionTest(ModelTesterMixin, unittest.TestCase):
            self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)

    @slow
-    @require_torch_gpu
+    @require_torch_accelerator
    def test_model_from_pretrained(self):
        model_name = "Salesforce/blip2-itm-vit-g"
        model = Blip2TextModelWithProjection.from_pretrained(model_name)
@@ -2191,7 +2192,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):

        self.assertTrue(generated_text_expanded == generated_text)

-    @require_torch_gpu
+    @require_torch_accelerator
    def test_inference_itm(self):
        model_name = "Salesforce/blip2-itm-vit-g"
        processor = Blip2Processor.from_pretrained(model_name)
@@ -2210,7 +2211,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
        self.assertTrue(torch.allclose(torch.nn.Softmax()(out_itm[0].cpu()), expected_scores, rtol=1e-3, atol=1e-3))
        self.assertTrue(torch.allclose(out[0].cpu(), torch.Tensor([[0.4406]]), rtol=1e-3, atol=1e-3))

-    @require_torch_gpu
+    @require_torch_accelerator
    @require_torch_fp16
    def test_inference_itm_fp16(self):
        model_name = "Salesforce/blip2-itm-vit-g"
@@ -2232,7 +2233,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
        )
        self.assertTrue(torch.allclose(out[0].cpu().float(), torch.Tensor([[0.4406]]), rtol=1e-3, atol=1e-3))

-    @require_torch_gpu
+    @require_torch_accelerator
    @require_torch_fp16
    def test_inference_vision_with_projection_fp16(self):
        model_name = "Salesforce/blip2-itm-vit-g"
@@ -2256,7 +2257,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
        ]
        self.assertTrue(np.allclose(out.image_embeds[0][0][:6].tolist(), expected_image_embeds, atol=1e-3))

-    @require_torch_gpu
+    @require_torch_accelerator
    @require_torch_fp16
    def test_inference_text_with_projection_fp16(self):
        model_name = "Salesforce/blip2-itm-vit-g"
--- a/tests/models/diffllama/test_modeling_diffllama.py
+++ b/tests/models/diffllama/test_modeling_diffllama.py
@@ -676,7 +676,7 @@ class DiffLlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
                )


-@require_torch_gpu
+@require_torch_accelerator
 class DiffLlamaIntegrationTest(unittest.TestCase):
    # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
    # Depending on the hardware we get different logits / generations
@@ -689,7 +689,7 @@ class DiffLlamaIntegrationTest(unittest.TestCase):
            cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]

    @slow
-    @require_torch_gpu
+    @require_torch_accelerator
    @require_read_token
    def test_compile_static_cache(self):
        # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
--- a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
+++ b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
@@ -23,7 +23,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from transformers.testing_utils import (
    require_bitsandbytes,
    require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
    require_torch_multi_gpu,
    slow,
    torch_device,
@@ -426,7 +426,7 @@ class FalconMambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest


@require_torch
-@require_torch_gpu
+@require_torch_accelerator
@slow
 class FalconMambaIntegrationTests(unittest.TestCase):
    def setUp(self):
--- a/tests/models/fuyu/test_modeling_fuyu.py
+++ b/tests/models/fuyu/test_modeling_fuyu.py
@@ -22,7 +22,7 @@ import requests
 from parameterized import parameterized

 from transformers import FuyuConfig, is_torch_available, is_vision_available
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
 from transformers.utils import cached_property

 from ...generation.test_utils import GenerationTesterMixin
@@ -327,7 +327,7 @@ class FuyuModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin


@slow
-@require_torch_gpu
+@require_torch_accelerator
 class FuyuModelIntegrationTest(unittest.TestCase):
    @cached_property
    def default_processor(self):
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -26,7 +26,6 @@ from transformers.testing_utils import (
    require_read_token,
    require_torch,
    require_torch_accelerator,
-    require_torch_gpu,
    slow,
    torch_device,
 )
@@ -541,7 +540,7 @@ class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
            config = _reinitialize_config(base_config, {"rope_scaling": {"rope_type": "linear"}})  # missing "factor"


-@require_torch_gpu
+@require_torch_accelerator
 class LlamaIntegrationTest(unittest.TestCase):
    # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
    # Depending on the hardware we get different logits / generations
@@ -695,7 +694,7 @@ class LlamaIntegrationTest(unittest.TestCase):
        self.assertEqual(EXPECTED_TEXT_COMPLETION, text)

    @slow
-    @require_torch_gpu
+    @require_torch_accelerator
    @require_read_token
    def test_compile_static_cache(self):
        # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -424,7 +424,7 @@ class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
        self.skipTest(reason="Mistral flash attention does not support right padding")


-@require_torch_gpu
+@require_torch_accelerator
 class MistralIntegrationTest(unittest.TestCase):
    # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
    # Depending on the hardware we get different logits / generations
--- a/tests/models/mixtral/test_modeling_mixtral.py
+++ b/tests/models/mixtral/test_modeling_mixtral.py
@@ -22,6 +22,7 @@ from transformers import MixtralConfig, is_torch_available
 from transformers.testing_utils import (
    require_flash_attn,
    require_torch,
+    require_torch_accelerator,
    require_torch_gpu,
    slow,
    torch_device,
@@ -471,7 +472,7 @@ class MixtralIntegrationTest(unittest.TestCase):
            cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]

    @slow
-    @require_torch_gpu
+    @require_torch_accelerator
    def test_small_model_logits(self):
        model_id = "hf-internal-testing/Mixtral-tiny"
        dummy_input = torch.LongTensor([[0, 1, 0], [0, 1, 0]]).to(torch_device)
@@ -507,7 +508,7 @@ class MixtralIntegrationTest(unittest.TestCase):
        )

    @slow
-    @require_torch_gpu
+    @require_torch_accelerator
    def test_small_model_logits_batched(self):
        model_id = "hf-internal-testing/Mixtral-tiny"
        dummy_input = torch.LongTensor([[0, 0, 0, 0, 0, 0, 1, 2, 3], [1, 1, 2, 3, 4, 5, 6, 7, 8]]).to(torch_device)
--- a/tests/models/nemotron/test_modeling_nemotron.py
+++ b/tests/models/nemotron/test_modeling_nemotron.py
@@ -26,6 +26,7 @@ from transformers.testing_utils import (
    require_flash_attn,
    require_read_token,
    require_torch,
+    require_torch_accelerator,
    require_torch_gpu,
    require_torch_sdpa,
    slow,
@@ -103,7 +104,7 @@ class NemotronModelTest(GemmaModelTest):
        pass

    @require_torch_sdpa
-    @require_torch_gpu
+    @require_torch_accelerator
    @slow
    def test_sdpa_equivalence(self):
        for model_class in self.all_model_classes:
--- a/tests/models/omdet_turbo/test_modeling_omdet_turbo.py
+++ b/tests/models/omdet_turbo/test_modeling_omdet_turbo.py
@@ -26,7 +26,7 @@ from transformers.file_utils import cached_property
 from transformers.testing_utils import (
    require_timm,
    require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
    require_vision,
    slow,
    torch_device,
@@ -865,7 +865,7 @@ class OmDetTurboModelIntegrationTests(unittest.TestCase):
        ]
        self.assertListEqual([result["classes"] for result in results], expected_classes)

-    @require_torch_gpu
+    @require_torch_accelerator
    def test_inference_object_detection_head_equivalence_cpu_gpu(self):
        processor = self.default_processor
        image = prepare_img()
@@ -878,8 +878,8 @@ class OmDetTurboModelIntegrationTests(unittest.TestCase):
            cpu_outputs = model(**encoding)

        # 2. run model on GPU
-        model.to("cuda")
-        encoding = encoding.to("cuda")
+        model.to(torch_device)
+        encoding = encoding.to(torch_device)
        with torch.no_grad():
            gpu_outputs = model(**encoding)

--- a/tests/models/rt_detr/test_modeling_rt_detr.py
+++ b/tests/models/rt_detr/test_modeling_rt_detr.py
@@ -28,7 +28,13 @@ from transformers import (
    is_torch_available,
    is_vision_available,
 )
-from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
+from transformers.testing_utils import (
+    require_torch,
+    require_torch_accelerator,
+    require_vision,
+    slow,
+    torch_device,
+)
 from transformers.utils import cached_property

 from ...test_configuration_common import ConfigTester
@@ -631,7 +637,7 @@ class RTDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        self.assertTrue(not failed_cases, message)

    @parameterized.expand(["float32", "float16", "bfloat16"])
-    @require_torch_gpu
+    @require_torch_accelerator
    @slow
    def test_inference_with_different_dtypes(self, torch_dtype_str):
        torch_dtype = {
@@ -653,7 +659,7 @@ class RTDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
                _ = model(**self._prepare_for_class(inputs_dict, model_class))

    @parameterized.expand(["float32", "float16", "bfloat16"])
-    @require_torch_gpu
+    @require_torch_accelerator
    @slow
    def test_inference_equivalence_for_static_and_dynamic_anchors(self, torch_dtype_str):
        torch_dtype = {
--- a/tests/models/starcoder2/test_modeling_starcoder2.py
+++ b/tests/models/starcoder2/test_modeling_starcoder2.py
@@ -23,6 +23,7 @@ from transformers.testing_utils import (
    require_bitsandbytes,
    require_flash_attn,
    require_torch,
+    require_torch_accelerator,
    require_torch_gpu,
    slow,
    torch_device,
@@ -412,7 +413,7 @@ class Starcoder2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste


@slow
-@require_torch_gpu
+@require_torch_accelerator
 class Starcoder2IntegrationTest(unittest.TestCase):
    def test_starcoder2_batched_generation_sdpa(self):
        EXPECTED_TEXT = [
--- a/tests/models/t5/test_modeling_t5.py
+++ b/tests/models/t5/test_modeling_t5.py
@@ -27,7 +27,7 @@ from transformers.testing_utils import (
    require_sentencepiece,
    require_tokenizers,
    require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
    slow,
    torch_device,
 )
@@ -1646,7 +1646,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
        )

    @slow
-    @require_torch_gpu
+    @require_torch_accelerator
    def test_compile_static_cache(self):
        NUM_TOKENS_TO_GENERATE = 40
        EXPECTED_TEXT_COMPLETION = [
@@ -1686,7 +1686,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
        self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text)

    @slow
-    @require_torch_gpu
+    @require_torch_accelerator
    def test_compile_static_cache_encoder(self):
        prompts = [
            "summarize: Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "