Update expected values (after switching to A10) - part 6 (#39207)
* fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -13,7 +13,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Testing suite for the PyTorch Aria model."""
|
"""Testing suite for the PyTorch Aria model."""
|
||||||
|
|
||||||
import gc
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -32,7 +31,7 @@ from transformers import (
|
|||||||
from transformers.models.idefics3 import Idefics3VisionConfig
|
from transformers.models.idefics3 import Idefics3VisionConfig
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
Expectations,
|
Expectations,
|
||||||
backend_empty_cache,
|
cleanup,
|
||||||
require_bitsandbytes,
|
require_bitsandbytes,
|
||||||
require_torch,
|
require_torch,
|
||||||
require_torch_large_accelerator,
|
require_torch_large_accelerator,
|
||||||
@@ -252,14 +251,23 @@ class AriaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTesterMi
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
SKIP = False
|
||||||
|
torch_accelerator_module = getattr(torch, torch_device)
|
||||||
|
memory = 23 # skip on T4 / A10
|
||||||
|
if hasattr(torch_accelerator_module, "get_device_properties"):
|
||||||
|
if torch_accelerator_module.get_device_properties(0).total_memory / 1024**3 < memory:
|
||||||
|
SKIP = True
|
||||||
|
|
||||||
|
|
||||||
|
@unittest.skipIf(SKIP, reason="A10 doesn't have enough GPU memory for this tests")
|
||||||
@require_torch
|
@require_torch
|
||||||
class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")
|
self.processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")
|
||||||
|
cleanup(torch_device, gc_collect=True)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
gc.collect()
|
cleanup(torch_device, gc_collect=True)
|
||||||
backend_empty_cache(torch_device)
|
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_large_accelerator
|
@require_torch_large_accelerator
|
||||||
|
|||||||
@@ -115,9 +115,12 @@ class GemmaIntegrationTest(unittest.TestCase):
|
|||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.device_properties = get_device_properties()
|
cls.device_properties = get_device_properties()
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
cleanup(torch_device, gc_collect=True)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
# See LlamaIntegrationTest.tearDown(). Can be removed once LlamaIntegrationTest.tearDown() is removed.
|
# See LlamaIntegrationTest.tearDown(). Can be removed once LlamaIntegrationTest.tearDown() is removed.
|
||||||
cleanup(torch_device, gc_collect=False)
|
cleanup(torch_device, gc_collect=True)
|
||||||
|
|
||||||
@require_read_token
|
@require_read_token
|
||||||
def test_model_2b_fp16(self):
|
def test_model_2b_fp16(self):
|
||||||
@@ -276,7 +279,7 @@ class GemmaIntegrationTest(unittest.TestCase):
|
|||||||
EXPECTED_TEXTS = Expectations(
|
EXPECTED_TEXTS = Expectations(
|
||||||
{
|
{
|
||||||
("cuda", 7): ["""Hello I am doing a project on a 1991 240sx and I am trying to find""", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",],
|
("cuda", 7): ["""Hello I am doing a project on a 1991 240sx and I am trying to find""", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",],
|
||||||
("cuda", 8): ["Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",],
|
("cuda", 8): ['Hello I am doing a project for my school and I am trying to make a game in which you have to get a', 'Hi today I am going to show you how to make a very simple and easy to make a very simple and'],
|
||||||
("rocm", 9): ["Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",],
|
("rocm", 9): ["Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -298,10 +301,20 @@ class GemmaIntegrationTest(unittest.TestCase):
|
|||||||
self.skipTest("This test is failing (`torch.compile` fails) on Nvidia T4 GPU (OOM).")
|
self.skipTest("This test is failing (`torch.compile` fails) on Nvidia T4 GPU (OOM).")
|
||||||
|
|
||||||
model_id = "google/gemma-7b"
|
model_id = "google/gemma-7b"
|
||||||
EXPECTED_TEXTS = [
|
|
||||||
"""Hello I am doing a project on a 1999 4.0L 4x4. I""",
|
expectations = Expectations(
|
||||||
"Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
|
{
|
||||||
]
|
(None, None): [
|
||||||
|
"Hello I am doing a project on a 1999 4.0L 4x4. I",
|
||||||
|
"Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
|
||||||
|
],
|
||||||
|
("cuda", 8): [
|
||||||
|
"Hello I am doing a project on a 1995 3000gt SL. I have a",
|
||||||
|
"Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
EXPECTED_TEXTS = expectations.get_expectation()
|
||||||
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device)
|
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device)
|
||||||
|
|
||||||
@@ -317,10 +330,20 @@ class GemmaIntegrationTest(unittest.TestCase):
|
|||||||
@require_read_token
|
@require_read_token
|
||||||
def test_model_7b_4bit(self):
|
def test_model_7b_4bit(self):
|
||||||
model_id = "google/gemma-7b"
|
model_id = "google/gemma-7b"
|
||||||
EXPECTED_TEXTS = [
|
|
||||||
"Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
|
expectations = Expectations(
|
||||||
"Hi today I am going to talk about the best way to get rid of acne. miniaturing is a very",
|
{
|
||||||
]
|
(None, None): [
|
||||||
|
"Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
|
||||||
|
"Hi today I am going to talk about the best way to get rid of acne. miniaturing is a very",
|
||||||
|
],
|
||||||
|
("cuda", 8): [
|
||||||
|
"Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
|
||||||
|
'Hi today I am going to talk about the new update for the game called "The new update!:)!:)!:)',
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
EXPECTED_TEXTS = expectations.get_expectation()
|
||||||
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
|
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
|
||||||
|
|
||||||
@@ -382,9 +405,19 @@ class GemmaIntegrationTest(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", pad_token="</s>", padding_side="right")
|
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", pad_token="</s>", padding_side="right")
|
||||||
EXPECTED_TEXT_COMPLETION = [
|
|
||||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
|
expectations = Expectations(
|
||||||
]
|
{
|
||||||
|
(None, None): [
|
||||||
|
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
|
||||||
|
],
|
||||||
|
("cuda", 8): [
|
||||||
|
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have been looking on the internet and I have"
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
EXPECTED_TEXT_COMPLETION = expectations.get_expectation()
|
||||||
|
|
||||||
max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[
|
max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[
|
||||||
"input_ids"
|
"input_ids"
|
||||||
].shape[-1]
|
].shape[-1]
|
||||||
@@ -432,15 +465,38 @@ class GemmaIntegrationTest(unittest.TestCase):
|
|||||||
exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens
|
exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens
|
||||||
)
|
)
|
||||||
ep_generated_text = tokenizer.batch_decode(ep_generated_ids, skip_special_tokens=True)
|
ep_generated_text = tokenizer.batch_decode(ep_generated_ids, skip_special_tokens=True)
|
||||||
|
|
||||||
|
# After switching to A10 on 2025/06/29, we get slightly different outputs when using export
|
||||||
|
expectations = Expectations(
|
||||||
|
{
|
||||||
|
(None, None): [
|
||||||
|
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
|
||||||
|
],
|
||||||
|
("cuda", 8): [
|
||||||
|
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
EXPECTED_TEXT_COMPLETION = expectations.get_expectation()
|
||||||
|
|
||||||
self.assertEqual(EXPECTED_TEXT_COMPLETION, ep_generated_text)
|
self.assertEqual(EXPECTED_TEXT_COMPLETION, ep_generated_text)
|
||||||
|
|
||||||
def test_model_2b_bf16_dola(self):
|
def test_model_2b_bf16_dola(self):
|
||||||
model_id = "google/gemma-2b"
|
model_id = "google/gemma-2b"
|
||||||
# ground truth text generated with dola_layers="low", repetition_penalty=1.2
|
# ground truth text generated with dola_layers="low", repetition_penalty=1.2
|
||||||
EXPECTED_TEXTS = [
|
expectations = Expectations(
|
||||||
"Hello I am doing an experiment and need to get the mass of a block. The problem is, it has no scale",
|
{
|
||||||
"Hi today we have the review for a <strong>2016/2017</strong> season of",
|
(None, None): [
|
||||||
]
|
"Hello I am doing an experiment and need to get the mass of a block. The problem is, it has no scale",
|
||||||
|
"Hi today we have the review for a <strong>2016/2017</strong> season of",
|
||||||
|
],
|
||||||
|
("cuda", 8): [
|
||||||
|
"Hello I am doing an experiment and need to get the mass of a block. The only tool I have is a scale",
|
||||||
|
"Hi today we have the review for a <strong>2016/2017</strong> season of",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
EXPECTED_TEXTS = expectations.get_expectation()
|
||||||
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
|
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user