Tests: remove cuda versions when the result is the same 🧹🧹 (#31955)
remove cuda versions when the result is the same
This commit is contained in:
@@ -738,32 +738,13 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
NUM_TOKENS_TO_GENERATE = 40
|
||||
# Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test
|
||||
# was changed to have a cache of 53 tokens (as opposed to 4096), on Ampere GPUs.
|
||||
#
|
||||
# Key 9 for MI300, Key 8 for A100/A10, and Key 7 for T4.
|
||||
#
|
||||
# Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s,
|
||||
# considering differences in hardware processing and potential deviations in generated text.
|
||||
EXPECTED_TEXT_COMPLETION = {
|
||||
8: [
|
||||
"Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
|
||||
"reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
|
||||
"theory of relativ",
|
||||
"My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
|
||||
"my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
|
||||
],
|
||||
7: [
|
||||
"Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe theory of relativ",
|
||||
"My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
|
||||
],
|
||||
9: [
|
||||
"Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial"
|
||||
" reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
|
||||
"theory of relativ",
|
||||
"My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs,"
|
||||
" my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
|
||||
],
|
||||
}
|
||||
expected_text_completion_idx = 8
|
||||
EXPECTED_TEXT_COMPLETION = [
|
||||
"Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
|
||||
"reference frames, and 2) the laws of physics are the same for all inertial reference frames.\nThe "
|
||||
"theory of relativ",
|
||||
"My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, "
|
||||
"my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p",
|
||||
]
|
||||
|
||||
prompts = [
|
||||
"Simply put, the theory of relativity states that ",
|
||||
@@ -778,16 +759,14 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
# Dynamic Cache
|
||||
generated_ids = model.generate(**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False)
|
||||
dynamic_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
self.assertEqual(
|
||||
EXPECTED_TEXT_COMPLETION[expected_text_completion_idx], dynamic_text
|
||||
) # Both GPU architectures have the same output
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION, dynamic_text)
|
||||
|
||||
# Static Cache
|
||||
generated_ids = model.generate(
|
||||
**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="static"
|
||||
)
|
||||
static_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_text)
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION, static_text)
|
||||
|
||||
# Static Cache + compile
|
||||
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
||||
@@ -795,7 +774,7 @@ class LlamaIntegrationTest(unittest.TestCase):
|
||||
**inputs, max_new_tokens=NUM_TOKENS_TO_GENERATE, do_sample=False, cache_implementation="static"
|
||||
)
|
||||
static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_compiled_text)
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text)
|
||||
|
||||
|
||||
@slow
|
||||
|
||||
Reference in New Issue
Block a user