From 037755ed54208eefa77673b0af2a0b13e51f2fb1 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Thu, 3 Jul 2025 22:45:30 +0200
Subject: [PATCH] Update expected values (after switching to A10) - part 6
 (#39207)

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 tests/models/aria/test_modeling_aria.py   | 16 +++-
 tests/models/gemma/test_modeling_gemma.py | 90 ++++++++++++++++++-----
 2 files changed, 85 insertions(+), 21 deletions(-)

diff --git a/tests/models/aria/test_modeling_aria.py b/tests/models/aria/test_modeling_aria.py
index cdab28a3a7..36ec831ddb 100644
--- a/tests/models/aria/test_modeling_aria.py
+++ b/tests/models/aria/test_modeling_aria.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 """Testing suite for the PyTorch Aria model."""
 
-import gc
 import unittest
 
 import requests
@@ -32,7 +31,7 @@ from transformers import (
 from transformers.models.idefics3 import Idefics3VisionConfig
 from transformers.testing_utils import (
     Expectations,
-    backend_empty_cache,
+    cleanup,
     require_bitsandbytes,
     require_torch,
     require_torch_large_accelerator,
@@ -252,14 +251,23 @@ class AriaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTesterMi
         pass
 
 
+SKIP = False
+torch_accelerator_module = getattr(torch, torch_device)
+memory = 23  # skip on T4 / A10
+if hasattr(torch_accelerator_module, "get_device_properties"):
+    if torch_accelerator_module.get_device_properties(0).total_memory / 1024**3 < memory:
+        SKIP = True
+
+
+@unittest.skipIf(SKIP, reason="A10 doesn't have enough GPU memory for this tests")
 @require_torch
 class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
     def setUp(self):
         self.processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")
+        cleanup(torch_device, gc_collect=True)
 
     def tearDown(self):
-        gc.collect()
-        backend_empty_cache(torch_device)
+        cleanup(torch_device, gc_collect=True)
 
     @slow
     @require_torch_large_accelerator
diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py
index 6d863eaf58..f8bc302a45 100644
--- a/tests/models/gemma/test_modeling_gemma.py
+++ b/tests/models/gemma/test_modeling_gemma.py
@@ -115,9 +115,12 @@ class GemmaIntegrationTest(unittest.TestCase):
     def setUpClass(cls):
         cls.device_properties = get_device_properties()
 
+    def setUp(self):
+        cleanup(torch_device, gc_collect=True)
+
     def tearDown(self):
         # See LlamaIntegrationTest.tearDown(). Can be removed once LlamaIntegrationTest.tearDown() is removed.
-        cleanup(torch_device, gc_collect=False)
+        cleanup(torch_device, gc_collect=True)
 
     @require_read_token
     def test_model_2b_fp16(self):
@@ -276,7 +279,7 @@ class GemmaIntegrationTest(unittest.TestCase):
         EXPECTED_TEXTS = Expectations(
             {
                 ("cuda", 7): ["""Hello I am doing a project on a 1991 240sx and I am trying to find""", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",],
-                ("cuda", 8): ["Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",],
+                ("cuda", 8): ['Hello I am doing a project for my school and I am trying to make a game in which you have to get a', 'Hi today I am going to show you how to make a very simple and easy to make a very simple and'],
                 ("rocm", 9): ["Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",],
             }
         )
@@ -298,10 +301,20 @@ class GemmaIntegrationTest(unittest.TestCase):
             self.skipTest("This test is failing (`torch.compile` fails) on Nvidia T4 GPU (OOM).")
 
         model_id = "google/gemma-7b"
-        EXPECTED_TEXTS = [
-            """Hello I am doing a project on a 1999 4.0L 4x4. I""",
-            "Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
-        ]
+
+        expectations = Expectations(
+            {
+                (None, None): [
+                    "Hello I am doing a project on a 1999 4.0L 4x4. I",
+                    "Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
+                ],
+                ("cuda", 8): [
+                    "Hello I am doing a project on a 1995 3000gt SL. I have a",
+                    "Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
+                ],
+            }
+        )
+        EXPECTED_TEXTS = expectations.get_expectation()
 
         model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device)
 
@@ -317,10 +330,20 @@ class GemmaIntegrationTest(unittest.TestCase):
     @require_read_token
     def test_model_7b_4bit(self):
         model_id = "google/gemma-7b"
-        EXPECTED_TEXTS = [
-            "Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
-            "Hi today I am going to talk about the best way to get rid of acne. miniaturing is a very",
-        ]
+
+        expectations = Expectations(
+            {
+                (None, None): [
+                    "Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
+                    "Hi today I am going to talk about the best way to get rid of acne. miniaturing is a very",
+                ],
+                ("cuda", 8): [
+                    "Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
+                    'Hi today I am going to talk about the new update for the game called "The new update!:)!:)!:)',
+                ],
+            }
+        )
+        EXPECTED_TEXTS = expectations.get_expectation()
 
         model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
 
@@ -382,9 +405,19 @@ class GemmaIntegrationTest(unittest.TestCase):
         )
 
         tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", pad_token="</s>", padding_side="right")
-        EXPECTED_TEXT_COMPLETION = [
-            "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
-        ]
+
+        expectations = Expectations(
+            {
+                (None, None): [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
+                ],
+                ("cuda", 8): [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have been looking on the internet and I have"
+                ],
+            }
+        )
+        EXPECTED_TEXT_COMPLETION = expectations.get_expectation()
+
         max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[
             "input_ids"
         ].shape[-1]
@@ -432,15 +465,38 @@ class GemmaIntegrationTest(unittest.TestCase):
             exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens
         )
         ep_generated_text = tokenizer.batch_decode(ep_generated_ids, skip_special_tokens=True)
+
+        # After switching to A10 on 2025/06/29, we get slightly different outputs when using export
+        expectations = Expectations(
+            {
+                (None, None): [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
+                ],
+                ("cuda", 8): [
+                    "Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
+                ],
+            }
+        )
+        EXPECTED_TEXT_COMPLETION = expectations.get_expectation()
+
         self.assertEqual(EXPECTED_TEXT_COMPLETION, ep_generated_text)
 
     def test_model_2b_bf16_dola(self):
         model_id = "google/gemma-2b"
         # ground truth text generated with dola_layers="low", repetition_penalty=1.2
-        EXPECTED_TEXTS = [
-            "Hello I am doing an experiment and need to get the mass of a block. The problem is, it has no scale",
-            "Hi today we have the review for a <strong>2016/2017</strong> season of",
-        ]
+        expectations = Expectations(
+            {
+                (None, None): [
+                    "Hello I am doing an experiment and need to get the mass of a block. The problem is, it has no scale",
+                    "Hi today we have the review for a <strong>2016/2017</strong> season of",
+                ],
+                ("cuda", 8): [
+                    "Hello I am doing an experiment and need to get the mass of a block. The only tool I have is a scale",
+                    "Hi today we have the review for a <strong>2016/2017</strong> season of",
+                ],
+            }
+        )
+        EXPECTED_TEXTS = expectations.get_expectation()
 
         model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)