From 037755ed54208eefa77673b0af2a0b13e51f2fb1 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 3 Jul 2025 22:45:30 +0200 Subject: [PATCH] Update expected values (after switching to A10) - part 6 (#39207) * fix * fix --------- Co-authored-by: ydshieh --- tests/models/aria/test_modeling_aria.py | 16 +++- tests/models/gemma/test_modeling_gemma.py | 90 ++++++++++++++++++----- 2 files changed, 85 insertions(+), 21 deletions(-) diff --git a/tests/models/aria/test_modeling_aria.py b/tests/models/aria/test_modeling_aria.py index cdab28a3a7..36ec831ddb 100644 --- a/tests/models/aria/test_modeling_aria.py +++ b/tests/models/aria/test_modeling_aria.py @@ -13,7 +13,6 @@ # limitations under the License. """Testing suite for the PyTorch Aria model.""" -import gc import unittest import requests @@ -32,7 +31,7 @@ from transformers import ( from transformers.models.idefics3 import Idefics3VisionConfig from transformers.testing_utils import ( Expectations, - backend_empty_cache, + cleanup, require_bitsandbytes, require_torch, require_torch_large_accelerator, @@ -252,14 +251,23 @@ class AriaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTesterMi pass +SKIP = False +torch_accelerator_module = getattr(torch, torch_device) +memory = 23 # skip on T4 / A10 +if hasattr(torch_accelerator_module, "get_device_properties"): + if torch_accelerator_module.get_device_properties(0).total_memory / 1024**3 < memory: + SKIP = True + + +@unittest.skipIf(SKIP, reason="A10 doesn't have enough GPU memory for this tests") @require_torch class AriaForConditionalGenerationIntegrationTest(unittest.TestCase): def setUp(self): self.processor = AutoProcessor.from_pretrained("rhymes-ai/Aria") + cleanup(torch_device, gc_collect=True) def tearDown(self): - gc.collect() - backend_empty_cache(torch_device) + cleanup(torch_device, gc_collect=True) @slow @require_torch_large_accelerator diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index 6d863eaf58..f8bc302a45 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -115,9 +115,12 @@ class GemmaIntegrationTest(unittest.TestCase): def setUpClass(cls): cls.device_properties = get_device_properties() + def setUp(self): + cleanup(torch_device, gc_collect=True) + def tearDown(self): # See LlamaIntegrationTest.tearDown(). Can be removed once LlamaIntegrationTest.tearDown() is removed. - cleanup(torch_device, gc_collect=False) + cleanup(torch_device, gc_collect=True) @require_read_token def test_model_2b_fp16(self): @@ -276,7 +279,7 @@ class GemmaIntegrationTest(unittest.TestCase): EXPECTED_TEXTS = Expectations( { ("cuda", 7): ["""Hello I am doing a project on a 1991 240sx and I am trying to find""", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",], - ("cuda", 8): ["Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",], + ("cuda", 8): ['Hello I am doing a project for my school and I am trying to make a game in which you have to get a', 'Hi today I am going to show you how to make a very simple and easy to make a very simple and'], ("rocm", 9): ["Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",], } ) @@ -298,10 +301,20 @@ class GemmaIntegrationTest(unittest.TestCase): self.skipTest("This test is failing (`torch.compile` fails) on Nvidia T4 GPU (OOM).") model_id = "google/gemma-7b" - EXPECTED_TEXTS = [ - """Hello I am doing a project on a 1999 4.0L 4x4. I""", - "Hi today I am going to show you how to make a simple and easy to make a DIY 3D", - ] + + expectations = Expectations( + { + (None, None): [ + "Hello I am doing a project on a 1999 4.0L 4x4. I", + "Hi today I am going to show you how to make a simple and easy to make a DIY 3D", + ], + ("cuda", 8): [ + "Hello I am doing a project on a 1995 3000gt SL. I have a", + "Hi today I am going to show you how to make a simple and easy to make a DIY 3D", + ], + } + ) + EXPECTED_TEXTS = expectations.get_expectation() model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device) @@ -317,10 +330,20 @@ class GemmaIntegrationTest(unittest.TestCase): @require_read_token def test_model_7b_4bit(self): model_id = "google/gemma-7b" - EXPECTED_TEXTS = [ - "Hello I am doing a project for my school and I am trying to make a program that will take a number and then", - "Hi today I am going to talk about the best way to get rid of acne. miniaturing is a very", - ] + + expectations = Expectations( + { + (None, None): [ + "Hello I am doing a project for my school and I am trying to make a program that will take a number and then", + "Hi today I am going to talk about the best way to get rid of acne. miniaturing is a very", + ], + ("cuda", 8): [ + "Hello I am doing a project for my school and I am trying to make a program that will take a number and then", + 'Hi today I am going to talk about the new update for the game called "The new update!:)!:)!:)', + ], + } + ) + EXPECTED_TEXTS = expectations.get_expectation() model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True) @@ -382,9 +405,19 @@ class GemmaIntegrationTest(unittest.TestCase): ) tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", pad_token="", padding_side="right") - EXPECTED_TEXT_COMPLETION = [ - "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found", - ] + + expectations = Expectations( + { + (None, None): [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found" + ], + ("cuda", 8): [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have been looking on the internet and I have" + ], + } + ) + EXPECTED_TEXT_COMPLETION = expectations.get_expectation() + max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[ "input_ids" ].shape[-1] @@ -432,15 +465,38 @@ class GemmaIntegrationTest(unittest.TestCase): exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens ) ep_generated_text = tokenizer.batch_decode(ep_generated_ids, skip_special_tokens=True) + + # After switching to A10 on 2025/06/29, we get slightly different outputs when using export + expectations = Expectations( + { + (None, None): [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found" + ], + ("cuda", 8): [ + "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found" + ], + } + ) + EXPECTED_TEXT_COMPLETION = expectations.get_expectation() + self.assertEqual(EXPECTED_TEXT_COMPLETION, ep_generated_text) def test_model_2b_bf16_dola(self): model_id = "google/gemma-2b" # ground truth text generated with dola_layers="low", repetition_penalty=1.2 - EXPECTED_TEXTS = [ - "Hello I am doing an experiment and need to get the mass of a block. The problem is, it has no scale", - "Hi today we have the review for a 2016/2017 season of", - ] + expectations = Expectations( + { + (None, None): [ + "Hello I am doing an experiment and need to get the mass of a block. The problem is, it has no scale", + "Hi today we have the review for a 2016/2017 season of", + ], + ("cuda", 8): [ + "Hello I am doing an experiment and need to get the mass of a block. The only tool I have is a scale", + "Hi today we have the review for a 2016/2017 season of", + ], + } + ) + EXPECTED_TEXTS = expectations.get_expectation() model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)