Minor llama4 fixes (#38123)

* fix wrong scaling value/default Cache init

* style

* fix various issues on integration tests

* change expected outputs

* fixup

* fix config access

* protect default scaling
This commit is contained in:
Pablo Montalvo
2025-05-20 15:15:54 +02:00
committed by GitHub
parent 856f034f45
commit 9cde2f5d42
2 changed files with 58 additions and 33 deletions

View File

@@ -37,7 +37,7 @@ if is_torch_available():
@require_torch_large_gpu
@require_read_token
class Llama4IntegrationTest(unittest.TestCase):
model_id = "ll-re/Llama-4-17B-Omni-Instruct"
model_id = "meta-llama/Llama-4-Scout-17B-16E"
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
# Depending on the hardware we get different logits / generations
cuda_compute_capability_major_version = None
@@ -48,14 +48,17 @@ class Llama4IntegrationTest(unittest.TestCase):
# 8 is for A100 / A10 and 7 for T4
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
cls.model = Llama4ForConditionalGeneration.from_pretrained(
"ll-re/Llama-4-17B-Omni-Instruct", device_map="auto", torch_dtype=torch.float32
"meta-llama/Llama-4-Scout-17B-16E",
device_map="auto",
torch_dtype=torch.float32,
attn_implementation="eager",
)
def setUp(self):
self.processor = Llama4Processor.from_pretrained("ll-re/Llama-4-17B-Omni-Instruct", padding_side="left")
self.processor = Llama4Processor.from_pretrained("meta-llama/Llama-4-Scout-17B-16E", padding_side="left")
url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png"
self.messages = [
self.messages_1 = [
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
{
"role": "user",
@@ -66,27 +69,7 @@ class Llama4IntegrationTest(unittest.TestCase):
},
]
def test_model_17b_16e_fp16(self):
EXPECTED_TEXT = [
"The capital of France is Paris, which is located in the north-central part of the country. Paris is known for its iconic landmarks such as the",
"Roses are red, violets are blue, and this poem is about you. Roses are red, violets are blue, and I love",
]
messages = [
{"role": "user", "content": "Who are you?"},
]
inputs = self.processor.apply_chat_template(
messages, add_generation_prompt=True, return_tensors="pt", return_dict=True
).to(torch_device)
output = self.model.generate(**inputs, max_new_tokens=100)
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
print(output_text)
self.assertEqual(output_text, EXPECTED_TEXT)
def test_model_17b_16e_batch(self):
messages_2 = [
self.messages_2 = [
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
{
"role": "user",
@@ -101,20 +84,35 @@ class Llama4IntegrationTest(unittest.TestCase):
},
]
def test_model_17b_16e_fp16(self):
EXPECTED_TEXT = [
'system\n\nYou are a helpful assistant.user\n\nWhat is shown in this image?assistant\n\nThe image shows a cow standing on a beach, with a blue sky and a body of water in the background. The cow is brown with a white'
] # fmt: skip
inputs = self.processor.apply_chat_template(
[self.messages, messages_2],
self.messages_1, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True
).to(device=torch_device, dtype=self.model.dtype)
output = self.model.generate(**inputs, max_new_tokens=30, do_sample=False)
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
print(output_text)
self.assertEqual(output_text, EXPECTED_TEXT)
def test_model_17b_16e_batch(self):
inputs = self.processor.apply_chat_template(
[self.messages_1, self.messages_2],
tokenize=True,
return_dict=True,
return_tensors="pt",
padding=True,
add_generation_prompt=True,
).to(torch_device)
).to(device=torch_device, dtype=torch.float32)
output = self.model.generate(**inputs, max_new_tokens=30, do_sample=False)
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_TEXTS = [
'user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear turquoise water and a blue sky in the background. It looks like',
"user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, these images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Image 1:** Shows a cow"
'system\n\nYou are a helpful assistant.user\n\nWhat is shown in this image?assistant\n\nThe image shows a cow standing on a beach, with a blue sky and a body of water in the background. The cow is brown with a white',
'system\n\nYou are a helpful assistant.user\n\nAre these images identical?assistant\n\nNo, these images are not identical. The first image shows a cow standing on a beach with a blue sky and a white cloud in the background.'
] # fmt: skip
self.assertEqual(output_text, EXPECTED_TEXTS)