Fix Qwen2 failing tests (#34819)
* fix: qwen2 model ids * fix: line * fix: more format * update: reformat
This commit is contained in:
@@ -440,15 +440,15 @@ class Qwen2IntegrationTest(unittest.TestCase):
|
|||||||
@slow
|
@slow
|
||||||
def test_model_450m_logits(self):
|
def test_model_450m_logits(self):
|
||||||
input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
|
input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
|
||||||
model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-450m-beta", device_map="auto")
|
model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-0.5B", device_map="auto")
|
||||||
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
|
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
out = model(input_ids).logits.float().cpu()
|
out = model(input_ids).logits.float().cpu()
|
||||||
# Expected mean on dim = -1
|
# Expected mean on dim = -1
|
||||||
EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]])
|
EXPECTED_MEAN = torch.tensor([[-1.9537, -1.6193, -1.4123, -1.4673, -1.8511, -1.9309, -1.9826, -2.1776]])
|
||||||
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)
|
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)
|
||||||
# slicing logits[0, 0, 0:30]
|
# slicing logits[0, 0, 0:30]
|
||||||
EXPECTED_SLICE = torch.tensor([-5.8781, -5.8616, -0.1052, -4.7200, -5.8781, -5.8774, -5.8773, -5.8777, -5.8781, -5.8780, -5.8781, -5.8779, -1.0787, 1.7583, -5.8779, -5.8780, -5.8783, -5.8778, -5.8776, -5.8781, -5.8784, -5.8778, -5.8778, -5.8777, -5.8779, -5.8778, -5.8776, -5.8780, -5.8779, -5.8781]) # fmt: skip
|
EXPECTED_SLICE = torch.tensor([3.2025, 7.1265, 4.6058, 3.6423, 1.6357, 3.9265, 5.1883, 5.8760, 2.7942, 4.4823, 3.2571, 2.1063, 3.4275, 4.2028, 1.9767, 5.2115, 6.6756, 6.3999, 6.0483, 5.7378, 5.6660, 5.2298, 5.4103, 5.1248, 5.4376, 2.4570, 2.6107, 5.4039, 2.8077, 4.7777]) # fmt: skip
|
||||||
print(out[0, 0, :30])
|
print(out[0, 0, :30])
|
||||||
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4)
|
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4)
|
||||||
|
|
||||||
@@ -458,10 +458,12 @@ class Qwen2IntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_model_450m_generation(self):
|
def test_model_450m_generation(self):
|
||||||
EXPECTED_TEXT_COMPLETION = """My favourite condiment is 100% ketchup. I love it on everything. I’m not a big"""
|
EXPECTED_TEXT_COMPLETION = (
|
||||||
|
"""My favourite condiment is 100% natural, organic and vegan. I love to use it in my cooking and I"""
|
||||||
|
)
|
||||||
prompt = "My favourite condiment is "
|
prompt = "My favourite condiment is "
|
||||||
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-450m-beta", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B", use_fast=False)
|
||||||
model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-450m-beta", device_map="auto")
|
model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-0.5B", device_map="auto")
|
||||||
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)
|
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)
|
||||||
|
|
||||||
# greedy generation outputs
|
# greedy generation outputs
|
||||||
@@ -482,7 +484,7 @@ class Qwen2IntegrationTest(unittest.TestCase):
|
|||||||
# An input with 4097 tokens that is above the size of the sliding window
|
# An input with 4097 tokens that is above the size of the sliding window
|
||||||
input_ids = [1] + [306, 338] * 2048
|
input_ids = [1] + [306, 338] * 2048
|
||||||
model = Qwen2ForCausalLM.from_pretrained(
|
model = Qwen2ForCausalLM.from_pretrained(
|
||||||
"Qwen/Qwen2-450m-beta",
|
"Qwen/Qwen2-0.5B",
|
||||||
device_map="auto",
|
device_map="auto",
|
||||||
load_in_4bit=True,
|
load_in_4bit=True,
|
||||||
attn_implementation="flash_attention_2",
|
attn_implementation="flash_attention_2",
|
||||||
@@ -509,11 +511,7 @@ class Qwen2IntegrationTest(unittest.TestCase):
|
|||||||
EXPECTED_OUTPUT_TOKEN_IDS = [306, 338]
|
EXPECTED_OUTPUT_TOKEN_IDS = [306, 338]
|
||||||
# An input with 4097 tokens that is above the size of the sliding window
|
# An input with 4097 tokens that is above the size of the sliding window
|
||||||
input_ids = [1] + [306, 338] * 2048
|
input_ids = [1] + [306, 338] * 2048
|
||||||
model = Qwen2ForCausalLM.from_pretrained(
|
model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-0.5B", device_map="auto", attn_implementation="sdpa")
|
||||||
"Qwen/Qwen2-450m-beta",
|
|
||||||
device_map="auto",
|
|
||||||
attn_implementation="sdpa",
|
|
||||||
)
|
|
||||||
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
|
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
|
||||||
generated_ids = model.generate(input_ids, max_new_tokens=4, temperature=0)
|
generated_ids = model.generate(input_ids, max_new_tokens=4, temperature=0)
|
||||||
self.assertEqual(EXPECTED_OUTPUT_TOKEN_IDS, generated_ids[0][-2:].tolist())
|
self.assertEqual(EXPECTED_OUTPUT_TOKEN_IDS, generated_ids[0][-2:].tolist())
|
||||||
@@ -530,9 +528,11 @@ class Qwen2IntegrationTest(unittest.TestCase):
|
|||||||
backend_empty_cache(torch_device)
|
backend_empty_cache(torch_device)
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
EXPECTED_TEXT_COMPLETION = """My favourite condiment is 100% ketchup. I love it on everything. I’m not a big"""
|
EXPECTED_TEXT_COMPLETION = (
|
||||||
|
"""My favourite condiment is 100% natural, organic and vegan. I love to use it in my cooking and I"""
|
||||||
|
)
|
||||||
prompt = "My favourite condiment is "
|
prompt = "My favourite condiment is "
|
||||||
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-450m-beta", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B", use_fast=False)
|
||||||
|
|
||||||
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)
|
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)
|
||||||
|
|
||||||
@@ -544,13 +544,13 @@ class Qwen2IntegrationTest(unittest.TestCase):
|
|||||||
@slow
|
@slow
|
||||||
def test_speculative_generation(self):
|
def test_speculative_generation(self):
|
||||||
EXPECTED_TEXT_COMPLETION = (
|
EXPECTED_TEXT_COMPLETION = (
|
||||||
"My favourite condiment is 100% Sriracha. I love the heat, the tang and the fact costs"
|
"My favourite condiment is 100% natural honey, and I always like to use it in my recipes. I love"
|
||||||
)
|
)
|
||||||
prompt = "My favourite condiment is "
|
prompt = "My favourite condiment is "
|
||||||
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B-beta", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B", use_fast=False)
|
||||||
model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-450m-beta", device_map="auto", torch_dtype=torch.float16)
|
model = Qwen2ForCausalLM.from_pretrained("Qwen/Qwen2-0.5B", device_map="auto", torch_dtype=torch.float16)
|
||||||
assistant_model = Qwen2ForCausalLM.from_pretrained(
|
assistant_model = Qwen2ForCausalLM.from_pretrained(
|
||||||
"Qwen/Qwen2-450m-beta", device_map="auto", torch_dtype=torch.float16
|
"Qwen/Qwen2-0.5B", device_map="auto", torch_dtype=torch.float16
|
||||||
)
|
)
|
||||||
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)
|
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)
|
||||||
|
|
||||||
@@ -576,10 +576,12 @@ class Qwen2IntegrationTest(unittest.TestCase):
|
|||||||
convert_and_export_with_cache,
|
convert_and_export_with_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
qwen_model = "Qwen/Qwen2.5-0.5B"
|
qwen_model = "Qwen/Qwen2-0.5B"
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(qwen_model, pad_token="</s>", padding_side="right")
|
tokenizer = AutoTokenizer.from_pretrained(qwen_model, pad_token="</s>", padding_side="right")
|
||||||
EXPECTED_TEXT_COMPLETION = ["My favourite condiment is 100% sugar. I have a jar of 1000 grams of sugar. I use"]
|
EXPECTED_TEXT_COMPLETION = [
|
||||||
|
"My favourite condiment is 100% natural, organic, gluten free, vegan, and free from preservatives. I"
|
||||||
|
]
|
||||||
max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[
|
max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[
|
||||||
"input_ids"
|
"input_ids"
|
||||||
].shape[-1]
|
].shape[-1]
|
||||||
|
|||||||
Reference in New Issue
Block a user