Tests: remove cuda versions when the result is the same 🧹🧹 (#31955)

remove cuda versions when the result is the same
2024-07-16 16:49:54 +01:00
parent 693cb828ff
commit 999981daf4
3 changed files with 52 additions and 155 deletions
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -738,32 +738,13 @@ class LlamaIntegrationTest(unittest.TestCase):
        NUM_TOKENS_TO_GENERATE = 40
        # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
        # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
-        #
-        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
-        #
-        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
-        # considering differences in hardware processing and potential deviations in generated text.
-        EXPECTED_TEXT_COMPLETION = {
-            8: [
-                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
-                "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
-                "theory of relativ",
-                "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
-                "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
-            ],
-            7: [
-                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe theory of relativ",
-                "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
-            ],
-            9: [
-                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
-                " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
-                "theory of relativ",
-                "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
-                " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
-            ],
-        }
-        expected_text_completion_idx = 8
+        EXPECTED_TEXT_COMPLETION = [
+            "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
+            "reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+            "theory of relativ",
+            "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
+            "my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+        ]

        prompts = [
            "Simply put, the theory of relativity states that ",
@@ -778,16 +759,14 @@ class LlamaIntegrationTest(unittest.TestCase):
        # Dynamic Cache
        generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False)
        dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
-        self.assertEqual(
-            EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text
-        )  # Both GPU architectures have the same output
+        self.assertEqual(EXPECTED_TEXT_COMPLETION, dynamic_text)

        # Static Cache
        generated_ids = model.generate(
            **inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="static"
        )
        static_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
-        self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_text)
+        self.assertEqual(EXPECTED_TEXT_COMPLETION, static_text)

        # Static Cache + compile
        model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
@@ -795,7 +774,7 @@ class LlamaIntegrationTest(unittest.TestCase):
            **inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="static"
        )
        static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
-        self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_compiled_text)
+        self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text)


@slow