Update expected values (after switching to A10) - part 7 (#39218)
* fix * fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -165,7 +165,8 @@ class Cohere2IntegrationTest(unittest.TestCase):
|
|||||||
EXPECTED_TEXTS = Expectations(
|
EXPECTED_TEXTS = Expectations(
|
||||||
{
|
{
|
||||||
("xpu", 3): ["<BOS_TOKEN>Hello I am doing a project for my school and I need to create a website for a fictional company. I have the", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
|
("xpu", 3): ["<BOS_TOKEN>Hello I am doing a project for my school and I need to create a website for a fictional company. I have the", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
|
||||||
("cuda", 7): ["<BOS_TOKEN>Hello I am doing a project for a school assignment and I need to create a website for a fictional company. I have", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n",],
|
(None, None): ["<BOS_TOKEN>Hello I am doing a project for a school assignment and I need to create a website for a fictional company. I have", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
|
||||||
|
("cuda", 8): ['<BOS_TOKEN>Hello I am doing a project for my school and I need to create a website for a fictional company. I have the', "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation()
|
EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation()
|
||||||
@@ -238,7 +239,8 @@ class Cohere2IntegrationTest(unittest.TestCase):
|
|||||||
EXPECTED_TEXT_COMPLETIONS = Expectations(
|
EXPECTED_TEXT_COMPLETIONS = Expectations(
|
||||||
{
|
{
|
||||||
("xpu", 3): ["Hello I am doing a project for a friend and I am stuck on a few things. I have a 2004 Ford F-"],
|
("xpu", 3): ["Hello I am doing a project for a friend and I am stuck on a few things. I have a 2004 Ford F-"],
|
||||||
("cuda", 7): ["Hello I am doing a project on the effects of social media on mental health. I have a few questions. 1. What is the relationship",],
|
(None, None): ["Hello I am doing a project on the effects of social media on mental health. I have a few questions. 1. What is the relationship"],
|
||||||
|
("cuda", 8): ['Hello I am doing a project for a friend and I am stuck on a few things. I have a 2004 Ford F-'],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
EXPECTED_TEXT_COMPLETION = EXPECTED_TEXT_COMPLETIONS.get_expectation()
|
EXPECTED_TEXT_COMPLETION = EXPECTED_TEXT_COMPLETIONS.get_expectation()
|
||||||
@@ -290,24 +292,31 @@ class Cohere2IntegrationTest(unittest.TestCase):
|
|||||||
if attn_implementation == "flash_attention_2" and not is_flash_attn_2_available():
|
if attn_implementation == "flash_attention_2" and not is_flash_attn_2_available():
|
||||||
self.skipTest("FlashAttention2 is required for this test.")
|
self.skipTest("FlashAttention2 is required for this test.")
|
||||||
|
|
||||||
|
# TODO: if we can specify not to compile when `flex` attention is used?
|
||||||
|
if attn_implementation == "flex_attention":
|
||||||
|
self.skipTest(
|
||||||
|
"Flex attention will compile (see `compile_friendly_flex_attention`) which causes triton issue."
|
||||||
|
)
|
||||||
|
|
||||||
if torch_device == "xpu" and attn_implementation == "flash_attention_2":
|
if torch_device == "xpu" and attn_implementation == "flash_attention_2":
|
||||||
self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.")
|
self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.")
|
||||||
|
|
||||||
model_id = "CohereForAI/c4ai-command-r7b-12-2024"
|
model_id = "CohereForAI/c4ai-command-r7b-12-2024"
|
||||||
EXPECTED_COMPLETIONS = [
|
EXPECTED_COMPLETIONS = [
|
||||||
" the mountains, the lakes, the rivers, the waterfalls, the waterfalls, the waterfalls, the waterfalls",
|
" the mountains, the lakes, the rivers, the forests, the trees, the birds, the animals",
|
||||||
", green, yellow, orange, purple, pink, brown, black, white, grey, silver",
|
", green, yellow, orange, purple, pink, brown, black, white, grey, silver",
|
||||||
]
|
]
|
||||||
|
|
||||||
input_text = [
|
input_text = [
|
||||||
"This is a nice place. " * 800 + "I really enjoy the scenery,", # This is larger than 4096 tokens
|
"This is a nice place. " * 200 + "I really enjoy the scenery,", # This is larger than 1024 tokens
|
||||||
"A list of colors: red, blue", # This will almost all be padding tokens
|
"A list of colors: red, blue", # This will almost all be padding tokens
|
||||||
]
|
]
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_id, padding="left")
|
tokenizer = AutoTokenizer.from_pretrained(model_id, padding="left")
|
||||||
inputs = tokenizer(input_text, padding=True, return_tensors="pt").to(torch_device)
|
inputs = tokenizer(input_text, padding=True, return_tensors="pt").to(torch_device)
|
||||||
|
|
||||||
|
# We use `sliding_window=1024` instead of the origin value `4096` in the config to avoid GPU OOM
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
model_id, attn_implementation=attn_implementation, torch_dtype=torch.float16
|
model_id, attn_implementation=attn_implementation, torch_dtype=torch.float16, sliding_window=1024
|
||||||
).to(torch_device)
|
).to(torch_device)
|
||||||
|
|
||||||
# Make sure prefill is larger than sliding window
|
# Make sure prefill is larger than sliding window
|
||||||
|
|||||||
@@ -211,6 +211,12 @@ class Data2VecVisionModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
@unittest.skip(
|
||||||
|
reason="Will fix only if requested by the community: it fails with `torch._dynamo.exc.InternalTorchDynamoError: IndexError: list index out of range`. Without compile, the test pass."
|
||||||
|
)
|
||||||
|
def test_sdpa_can_compile_dynamic(self):
|
||||||
|
pass
|
||||||
|
|
||||||
@unittest.skip(reason="Data2VecVision does not use inputs_embeds")
|
@unittest.skip(reason="Data2VecVision does not use inputs_embeds")
|
||||||
def test_inputs_embeds(self):
|
def test_inputs_embeds(self):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -220,6 +220,10 @@ class DepthProModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
|||||||
def test_config(self):
|
def test_config(self):
|
||||||
self.config_tester.run_common_tests()
|
self.config_tester.run_common_tests()
|
||||||
|
|
||||||
|
@unittest.skip(reason="Inductor error: name 'OpaqueUnaryFn_log2' is not defined")
|
||||||
|
def test_sdpa_can_compile_dynamic(self):
|
||||||
|
pass
|
||||||
|
|
||||||
@unittest.skip(reason="DepthPro does not use inputs_embeds")
|
@unittest.skip(reason="DepthPro does not use inputs_embeds")
|
||||||
def test_inputs_embeds(self):
|
def test_inputs_embeds(self):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ from transformers import Olmo2Config, is_torch_available, set_seed
|
|||||||
from transformers.generation.configuration_utils import GenerationConfig
|
from transformers.generation.configuration_utils import GenerationConfig
|
||||||
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
|
Expectations,
|
||||||
|
cleanup,
|
||||||
require_tokenizers,
|
require_tokenizers,
|
||||||
require_torch,
|
require_torch,
|
||||||
slow,
|
slow,
|
||||||
@@ -232,30 +234,55 @@ class Olmo2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
class Olmo2IntegrationTest(unittest.TestCase):
|
class Olmo2IntegrationTest(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
cleanup(torch_device, gc_collect=True)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
cleanup(torch_device, gc_collect=True)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_model_1b_logits_bfloat16(self):
|
def test_model_1b_logits_bfloat16(self):
|
||||||
input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]]
|
input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]]
|
||||||
model = Olmo2ForCausalLM.from_pretrained("allenai/OLMo-2-0425-1B").to(torch.bfloat16)
|
model = Olmo2ForCausalLM.from_pretrained("allenai/OLMo-2-0425-1B").to(torch_device, torch.bfloat16)
|
||||||
out = model(torch.tensor(input_ids)).logits.float()
|
out = model(torch.tensor(input_ids, device=torch_device)).logits.float()
|
||||||
# Expected mean on dim = -1
|
# Expected mean on dim = -1
|
||||||
EXPECTED_MEAN = torch.tensor([[-5.7094, -6.5548, -3.2527, -2.7847, -5.5092, -4.5223, -4.8427, -4.6867]])
|
expectations = Expectations(
|
||||||
|
{
|
||||||
|
("cuda", 8): [[-5.6700, -6.5557, -3.1545, -2.7418, -5.5887, -4.5179, -4.9077, -4.6530]],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
EXPECTED_MEAN = torch.tensor(expectations.get_expectation(), device=torch_device)
|
||||||
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2)
|
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2)
|
||||||
|
|
||||||
# slicing logits[0, 0, 0:30]
|
# slicing logits[0, 0, 0:30]
|
||||||
EXPECTED_SLICE = torch.tensor([2.4531, -5.7188, -5.1562, -4.8750, -6.7812, -4.0625, -4.4375, -4.5938, -7.5938, -5.0938, -3.9375, -3.6875, -5.0938, -3.1875, -5.6875, 0.2266, 1.2578, 1.1016, 0.8945, 0.4785, 0.2256, -0.3613, -0.4258, 0.1377, -0.1104, -7.1875, -5.2188, -6.8125, -0.9062, -2.9062]) # fmt: skip
|
expectations = Expectations(
|
||||||
|
{
|
||||||
|
("cuda", 8): [2.65625, -5.25, -4.9375, -4.53125, -6.5, -3.828125, -4.15625, -4.1875, -7.0625, -4.71875, -3.609375, -3.09375, -4.59375, -2.640625, -5.25, 0.39453125, 1.3828125, 1.2265625, 1.0078125, 0.57421875, 0.330078125, -0.287109375, -0.3671875, 0.1943359375, -0.0732421875, -6.6875, -4.75, -6.4375, -0.625, -2.625],
|
||||||
|
}
|
||||||
|
) # fmt: skip
|
||||||
|
EXPECTED_SLICE = torch.tensor(expectations.get_expectation(), device=torch_device)
|
||||||
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, rtol=1e-2, atol=1e-2)
|
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, rtol=1e-2, atol=1e-2)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_model_7b_logits(self):
|
def test_model_7b_logits(self):
|
||||||
input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]]
|
input_ids = [[1, 306, 4658, 278, 6593, 310, 2834, 338]]
|
||||||
model = Olmo2ForCausalLM.from_pretrained("shanearora/OLMo2-7B-1124-hf", device_map="auto")
|
model = Olmo2ForCausalLM.from_pretrained("shanearora/OLMo2-7B-1124-hf").to(torch_device, dtype=torch.bfloat16)
|
||||||
out = model(torch.tensor(input_ids)).logits.float()
|
out = model(torch.tensor(input_ids, device=torch_device)).logits.float()
|
||||||
# Expected mean on dim = -1
|
# Expected mean on dim = -1
|
||||||
EXPECTED_MEAN = torch.tensor(
|
expectations = Expectations(
|
||||||
[[-13.0244, -13.9564, -11.8270, -11.3047, -12.3794, -12.4215, -15.6030, -12.7962]]
|
{
|
||||||
|
("cuda", 8): [[-13.0518, -13.8897, -11.7999, -11.3222, -12.3441, -12.3884, -15.4874, -12.7365]],
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
EXPECTED_MEAN = torch.tensor(expectations.get_expectation(), device=torch_device)
|
||||||
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2)
|
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2)
|
||||||
# slicing logits[0, 0, 0:30]
|
# slicing logits[0, 0, 0:30]
|
||||||
EXPECTED_SLICE = torch.tensor([-5.3909, -13.9841, -13.6123, -14.5780, -13.9455, -13.2265, -13.4734, -11.9079, -9.2879, -12.6139, -11.4819, -5.9607, -11.9657, -6.3618, -11.1065, -7.3075, -6.5674, -6.7154, -7.3409, -7.9662, -8.0863, -8.1682, -8.7341, -8.7665, -8.8742, -9.7813, -8.0620, -12.5937, -7.6440, -11.3966]) # fmt: skip
|
expectations = Expectations(
|
||||||
|
{
|
||||||
|
("cuda", 8): [-5.5, -14.4375, -13.8125, -14.875, -14.125, -13.4375, -13.8125, -12.25, -9.5, -12.9375, -11.6875, -6.09375, -12.1875, -6.5, -11.3125, -7.34375, -6.5625, -6.71875, -7.375, -7.96875, -8.0625, -8.1875, -8.75, -8.75, -8.875, -9.9375, -8.1875, -12.875, -7.84375, -11.625],
|
||||||
|
}
|
||||||
|
) # fmt: skip
|
||||||
|
EXPECTED_SLICE = torch.tensor(expectations.get_expectation(), device=torch_device)
|
||||||
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, rtol=1e-2, atol=1e-2)
|
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, rtol=1e-2, atol=1e-2)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
|
|||||||
Reference in New Issue
Block a user