CI: AMD MI300 tests fix (#30797)

* add fix

* update import

* updated dicts and comments

* remove prints

* Update testing_utils.py
This commit is contained in:
Mohit Sharma
2024-05-21 17:16:07 +05:30
committed by GitHub
parent a755745546
commit 7a4792e6b3
10 changed files with 126 additions and 10 deletions

View File

@@ -715,6 +715,11 @@ class LlamaIntegrationTest(unittest.TestCase):
NUM_TOKENS_TO_GENERATE = 40
# Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
# was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
#
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
#
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
# considering differences in hardware processing and potential deviations in generated text.
EXPECTED_TEXT_COMPLETION = {
8: [
"Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
@@ -730,7 +735,15 @@ class LlamaIntegrationTest(unittest.TestCase):
"My favorite all time favorite condiment is ketchup. I love it on hamburgers, hot dogs, fries, eggs, "
"and even on a good old fashioned cheeseburger. I love it on everything. I love it so",
],
9: [
"Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
" reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
"theory of relativ",
"My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
" my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
],
}
expected_text_completion_idx = 8
prompts = [
"Simply put, the theory of relativity states that ",
@@ -745,7 +758,9 @@ class LlamaIntegrationTest(unittest.TestCase):
# Dynamic Cache
generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False)
dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT_COMPLETION[8], dynamic_text) # Both GPU architectures have the same output
self.assertEqual(
EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text
) # Both GPU architectures have the same output
# Static Cache
generated_ids = model.generate(