CI: AMD MI300 tests fix (#30797)

* add fix * update import * updated dicts and comments * remove prints * Update testing_utils.py
2024-05-21 17:16:07 +05:30
parent a755745546
commit 7a4792e6b3
10 changed files with 126 additions and 10 deletions
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -715,6 +715,11 @@ class LlamaIntegrationTest(unittest.TestCase):
        NUM_TOKENS_TO_GENERATE = 40
        # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
        # was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
+        #
+        # Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
+        #
+        # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
+        # considering differences in hardware processing and potential deviations in generated text.
        EXPECTED_TEXT_COMPLETION = {
            8: [
                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
@@ -730,7 +735,15 @@ class LlamaIntegrationTest(unittest.TestCase):
                "My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
                "and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
            ],
+            9: [
+                "Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
+                " reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
+                "theory of relativ",
+                "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
+                " my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
+            ],
        }
+        expected_text_completion_idx = 8

        prompts = [
            "Simply put, the theory of relativity states that ",
@@ -745,7 +758,9 @@ class LlamaIntegrationTest(unittest.TestCase):
        # Dynamic Cache
        generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False)
        dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
-        self.assertEqual(EXPECTED_TEXT_COMPLETION[8], dynamic_text)  # Both GPU architectures have the same output
+        self.assertEqual(
+            EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text
+        )  # Both GPU architectures have the same output

        # Static Cache
        generated_ids = model.generate(