Update expected values (after switching to A10) - part 7 (#39218)

* fix

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2025-07-04 12:48:10 +02:00
committed by GitHub
parent 0cf27916f0
commit cd8a041a4f
4 changed files with 60 additions and 14 deletions

View File

@@ -165,7 +165,8 @@ class Cohere2IntegrationTest(unittest.TestCase):
EXPECTED_TEXTS = Expectations( EXPECTED_TEXTS = Expectations(
{ {
("xpu", 3): ["<BOS_TOKEN>Hello I am doing a project for my school and I need to create a website for a fictional company. I have the", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"], ("xpu", 3): ["<BOS_TOKEN>Hello I am doing a project for my school and I need to create a website for a fictional company. I have the", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
("cuda", 7): ["<BOS_TOKEN>Hello I am doing a project for a school assignment and I need to create a website for a fictional company. I have", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n",], (None, None): ["<BOS_TOKEN>Hello I am doing a project for a school assignment and I need to create a website for a fictional company. I have", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
("cuda", 8): ['<BOS_TOKEN>Hello I am doing a project for my school and I need to create a website for a fictional company. I have the', "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
} }
) )
EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation() EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation()
@@ -238,7 +239,8 @@ class Cohere2IntegrationTest(unittest.TestCase):
EXPECTED_TEXT_COMPLETIONS = Expectations( EXPECTED_TEXT_COMPLETIONS = Expectations(
{ {
("xpu", 3): ["Hello I am doing a project for a friend and I am stuck on a few things. I have a 2004 Ford F-"], ("xpu", 3): ["Hello I am doing a project for a friend and I am stuck on a few things. I have a 2004 Ford F-"],
("cuda", 7): ["Hello I am doing a project on the effects of social media on mental health. I have a few questions. 1. What is the relationship",], (None, None): ["Hello I am doing a project on the effects of social media on mental health. I have a few questions. 1. What is the relationship"],
("cuda", 8): ['Hello I am doing a project for a friend and I am stuck on a few things. I have a 2004 Ford F-'],
} }
) )
EXPECTED_TEXT_COMPLETION = EXPECTED_TEXT_COMPLETIONS.get_expectation() EXPECTED_TEXT_COMPLETION = EXPECTED_TEXT_COMPLETIONS.get_expectation()
@@ -290,24 +292,31 @@ class Cohere2IntegrationTest(unittest.TestCase):
if attn_implementation == "flash_attention_2" and not is_flash_attn_2_available(): if attn_implementation == "flash_attention_2" and not is_flash_attn_2_available():
self.skipTest("FlashAttention2 is required for this test.") self.skipTest("FlashAttention2 is required for this test.")
# TODO: if we can specify not to compile when `flex` attention is used?
if attn_implementation == "flex_attention":
self.skipTest(
"Flex attention will compile (see `compile_friendly_flex_attention`) which causes triton issue."
)
if torch_device == "xpu" and attn_implementation == "flash_attention_2": if torch_device == "xpu" and attn_implementation == "flash_attention_2":
self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.") self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.")
model_id = "CohereForAI/c4ai-command-r7b-12-2024" model_id = "CohereForAI/c4ai-command-r7b-12-2024"
EXPECTED_COMPLETIONS = [ EXPECTED_COMPLETIONS = [
" the mountains, the lakes, the rivers, the waterfalls, the waterfalls, the waterfalls, the waterfalls", " the mountains, the lakes, the rivers, the forests, the trees, the birds, the animals",
", green, yellow, orange, purple, pink, brown, black, white, grey, silver", ", green, yellow, orange, purple, pink, brown, black, white, grey, silver",
] ]
input_text = [ input_text = [
"This is a nice place. " * 800 + "I really enjoy the scenery,", # This is larger than 4096 tokens "This is a nice place. " * 200 + "I really enjoy the scenery,", # This is larger than 1024 tokens
"A list of colors: red, blue", # This will almost all be padding tokens "A list of colors: red, blue", # This will almost all be padding tokens
] ]
tokenizer = AutoTokenizer.from_pretrained(model_id, padding="left") tokenizer = AutoTokenizer.from_pretrained(model_id, padding="left")
inputs = tokenizer(input_text, padding=True, return_tensors="pt").to(torch_device) inputs = tokenizer(input_text, padding=True, return_tensors="pt").to(torch_device)
# We use `sliding_window=1024` instead of the origin value `4096` in the config to avoid GPU OOM
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_id, attn_implementation=attn_implementation, torch_dtype=torch.float16 model_id, attn_implementation=attn_implementation, torch_dtype=torch.float16, sliding_window=1024
).to(torch_device) ).to(torch_device)
# Make sure prefill is larger than sliding window # Make sure prefill is larger than sliding window

View File

@@ -211,6 +211,12 @@ class Data2VecVisionModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
@unittest.skip(
reason="Will fix only if requested by the community: it fails with `torch._dynamo.exc.InternalTorchDynamoError: IndexError: list index out of range`. Without compile, the test pass."
)
def test_sdpa_can_compile_dynamic(self):
pass
@unittest.skip(reason="Data2VecVision does not use inputs_embeds") @unittest.skip(reason="Data2VecVision does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
pass pass

View File

@@ -220,6 +220,10 @@ class DepthProModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
@unittest.skip(reason="Inductor error: name 'OpaqueUnaryFn_log2' is not defined")
def test_sdpa_can_compile_dynamic(self):
pass
@unittest.skip(reason="DepthPro does not use inputs_embeds") @unittest.skip(reason="DepthPro does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
pass pass

View File

@@ -22,6 +22,8 @@ from transformers import Olmo2Config, is_torch_available, set_seed
from transformers.generation.configuration_utils import GenerationConfig from transformers.generation.configuration_utils import GenerationConfig
from transformers.models.auto.tokenization_auto import AutoTokenizer from transformers.models.auto.tokenization_auto import AutoTokenizer
from transformers.testing_utils import ( from transformers.testing_utils import (
Expectations,
cleanup,
require_tokenizers, require_tokenizers,
require_torch, require_torch,
slow, slow,
@@ -232,30 +234,55 @@ class Olmo2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
@require_torch @require_torch
class Olmo2IntegrationTest(unittest.TestCase): class Olmo2IntegrationTest(unittest.TestCase):
def setUp(self):
cleanup(torch_device, gc_collect=True)
def tearDown(self):
cleanup(torch_device, gc_collect=True)
@slow @slow
def test_model_1b_logits_bfloat16(self): def test_model_1b_logits_bfloat16(self):
input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]] input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]]
model = Olmo2ForCausalLM.from_pretrained("allenai/OLMo-2-0425-1B").to(torch.bfloat16) model = Olmo2ForCausalLM.from_pretrained("allenai/OLMo-2-0425-1B").to(torch_device, torch.bfloat16)
out = model(torch.tensor(input_ids)).logits.float() out = model(torch.tensor(input_ids, device=torch_device)).logits.float()
# Expected mean on dim = -1 # Expected mean on dim = -1
EXPECTED_MEAN = torch.tensor([[-5.7094, -6.5548, -3.2527, -2.7847, -5.5092, -4.5223, -4.8427, -4.6867]]) expectations = Expectations(
{
("cuda", 8): [[-5.6700, -6.5557, -3.1545, -2.7418, -5.5887, -4.5179, -4.9077, -4.6530]],
}
)
EXPECTED_MEAN = torch.tensor(expectations.get_expectation(), device=torch_device)
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2)
# slicing logits[0, 0, 0:30] # slicing logits[0, 0, 0:30]
EXPECTED_SLICE = torch.tensor([2.4531, -5.7188, -5.1562, -4.8750, -6.7812, -4.0625, -4.4375, -4.5938, -7.5938, -5.0938, -3.9375, -3.6875, -5.0938, -3.1875, -5.6875, 0.2266, 1.2578, 1.1016, 0.8945, 0.4785, 0.2256, -0.3613, -0.4258, 0.1377, -0.1104, -7.1875, -5.2188, -6.8125, -0.9062, -2.9062]) # fmt: skip expectations = Expectations(
{
("cuda", 8): [2.65625, -5.25, -4.9375, -4.53125, -6.5, -3.828125, -4.15625, -4.1875, -7.0625, -4.71875, -3.609375, -3.09375, -4.59375, -2.640625, -5.25, 0.39453125, 1.3828125, 1.2265625, 1.0078125, 0.57421875, 0.330078125, -0.287109375, -0.3671875, 0.1943359375, -0.0732421875, -6.6875, -4.75, -6.4375, -0.625, -2.625],
}
) # fmt: skip
EXPECTED_SLICE = torch.tensor(expectations.get_expectation(), device=torch_device)
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, rtol=1e-2, atol=1e-2) torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, rtol=1e-2, atol=1e-2)
@slow @slow
def test_model_7b_logits(self): def test_model_7b_logits(self):
input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]] input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]]
model = Olmo2ForCausalLM.from_pretrained("shanearora/OLMo2-7B-1124-hf", device_map="auto") model = Olmo2ForCausalLM.from_pretrained("shanearora/OLMo2-7B-1124-hf").to(torch_device, dtype=torch.bfloat16)
out = model(torch.tensor(input_ids)).logits.float() out = model(torch.tensor(input_ids, device=torch_device)).logits.float()
# Expected mean on dim = -1 # Expected mean on dim = -1
EXPECTED_MEAN = torch.tensor( expectations = Expectations(
[[-13.0244, -13.9564, -11.8270, -11.3047, -12.3794, -12.4215, -15.6030, -12.7962]] {
("cuda", 8): [[-13.0518, -13.8897, -11.7999, -11.3222, -12.3441, -12.3884, -15.4874, -12.7365]],
}
) )
EXPECTED_MEAN = torch.tensor(expectations.get_expectation(), device=torch_device)
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2) torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2)
# slicing logits[0, 0, 0:30] # slicing logits[0, 0, 0:30]
EXPECTED_SLICE = torch.tensor([-5.3909, -13.9841, -13.6123, -14.5780, -13.9455, -13.2265, -13.4734, -11.9079, -9.2879, -12.6139, -11.4819, -5.9607, -11.9657, -6.3618, -11.1065, -7.3075, -6.5674, -6.7154, -7.3409, -7.9662, -8.0863, -8.1682, -8.7341, -8.7665, -8.8742, -9.7813, -8.0620, -12.5937, -7.6440, -11.3966]) # fmt: skip expectations = Expectations(
{
("cuda", 8): [-5.5, -14.4375, -13.8125, -14.875, -14.125, -13.4375, -13.8125, -12.25, -9.5, -12.9375, -11.6875, -6.09375, -12.1875, -6.5, -11.3125, -7.34375, -6.5625, -6.71875, -7.375, -7.96875, -8.0625, -8.1875, -8.75, -8.75, -8.875, -9.9375, -8.1875, -12.875, -7.84375, -11.625],
}
) # fmt: skip
EXPECTED_SLICE = torch.tensor(expectations.get_expectation(), device=torch_device)
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, rtol=1e-2, atol=1e-2) torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, rtol=1e-2, atol=1e-2)
@slow @slow