make aya vision 5 integration tests pass on xpu (#37990)
* 5 aya vision integration pass on XPU Signed-off-by: Yao Matrix <matrix.yao@intel.com> * fix style Signed-off-by: Yao Matrix <matrix.yao@intel.com> --------- Signed-off-by: Yao Matrix <matrix.yao@intel.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -25,10 +25,12 @@ from transformers import (
|
|||||||
is_vision_available,
|
is_vision_available,
|
||||||
)
|
)
|
||||||
from transformers.testing_utils import (
|
from transformers.testing_utils import (
|
||||||
|
Expectations,
|
||||||
cleanup,
|
cleanup,
|
||||||
|
require_deterministic_for_xpu,
|
||||||
require_read_token,
|
require_read_token,
|
||||||
require_torch,
|
require_torch,
|
||||||
require_torch_gpu,
|
require_torch_accelerator,
|
||||||
slow,
|
slow,
|
||||||
torch_device,
|
torch_device,
|
||||||
)
|
)
|
||||||
@@ -332,7 +334,7 @@ class AyaVisionIntegrationTest(unittest.TestCase):
|
|||||||
cleanup(torch_device, gc_collect=True)
|
cleanup(torch_device, gc_collect=True)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
def test_small_model_integration_forward(self):
|
def test_small_model_integration_forward(self):
|
||||||
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
||||||
model = AyaVisionForConditionalGeneration.from_pretrained(
|
model = AyaVisionForConditionalGeneration.from_pretrained(
|
||||||
@@ -366,7 +368,8 @@ class AyaVisionIntegrationTest(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
|
@require_deterministic_for_xpu
|
||||||
def test_small_model_integration_generate_text_only(self):
|
def test_small_model_integration_generate_text_only(self):
|
||||||
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
||||||
model = AyaVisionForConditionalGeneration.from_pretrained(
|
model = AyaVisionForConditionalGeneration.from_pretrained(
|
||||||
@@ -390,11 +393,19 @@ class AyaVisionIntegrationTest(unittest.TestCase):
|
|||||||
generate_ids[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True
|
generate_ids[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True
|
||||||
)
|
)
|
||||||
print("decoded_output", decoded_output)
|
print("decoded_output", decoded_output)
|
||||||
expected_output = "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song."
|
|
||||||
|
expected_outputs = Expectations(
|
||||||
|
{
|
||||||
|
("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit sky,\nNature's quiet song.",
|
||||||
|
("cuda", 7): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.",
|
||||||
|
}
|
||||||
|
) # fmt: skip
|
||||||
|
expected_output = expected_outputs.get_expectation()
|
||||||
|
|
||||||
self.assertEqual(decoded_output, expected_output)
|
self.assertEqual(decoded_output, expected_output)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
def test_small_model_integration_generate_chat_template(self):
|
def test_small_model_integration_generate_chat_template(self):
|
||||||
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
||||||
model = AyaVisionForConditionalGeneration.from_pretrained(
|
model = AyaVisionForConditionalGeneration.from_pretrained(
|
||||||
@@ -423,7 +434,7 @@ class AyaVisionIntegrationTest(unittest.TestCase):
|
|||||||
self.assertEqual(decoded_output, expected_output)
|
self.assertEqual(decoded_output, expected_output)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
def test_small_model_integration_batched_generate(self):
|
def test_small_model_integration_batched_generate(self):
|
||||||
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
||||||
model = AyaVisionForConditionalGeneration.from_pretrained(
|
model = AyaVisionForConditionalGeneration.from_pretrained(
|
||||||
@@ -459,7 +470,14 @@ class AyaVisionIntegrationTest(unittest.TestCase):
|
|||||||
# Check first output
|
# Check first output
|
||||||
decoded_output = processor.decode(output[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True)
|
decoded_output = processor.decode(output[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True)
|
||||||
print("decoded_output", decoded_output)
|
print("decoded_output", decoded_output)
|
||||||
expected_output = "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene." # fmt: skip
|
expected_outputs = Expectations(
|
||||||
|
{
|
||||||
|
("xpu", 3): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest lake.",
|
||||||
|
("cuda", 7): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene.",
|
||||||
|
}
|
||||||
|
) # fmt: skip
|
||||||
|
expected_output = expected_outputs.get_expectation()
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
decoded_output,
|
decoded_output,
|
||||||
expected_output,
|
expected_output,
|
||||||
@@ -478,7 +496,8 @@ class AyaVisionIntegrationTest(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@require_torch_gpu
|
@require_torch_accelerator
|
||||||
|
@require_deterministic_for_xpu
|
||||||
def test_small_model_integration_batched_generate_multi_image(self):
|
def test_small_model_integration_batched_generate_multi_image(self):
|
||||||
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
|
||||||
model = AyaVisionForConditionalGeneration.from_pretrained(
|
model = AyaVisionForConditionalGeneration.from_pretrained(
|
||||||
@@ -523,7 +542,14 @@ class AyaVisionIntegrationTest(unittest.TestCase):
|
|||||||
# Check first output
|
# Check first output
|
||||||
decoded_output = processor.decode(output[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True)
|
decoded_output = processor.decode(output[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True)
|
||||||
# Batching seems to alter the output slightly, but it is also the case in the original implementation. This seems to be expected: https://github.com/huggingface/transformers/issues/23017#issuecomment-1649630232
|
# Batching seems to alter the output slightly, but it is also the case in the original implementation. This seems to be expected: https://github.com/huggingface/transformers/issues/23017#issuecomment-1649630232
|
||||||
expected_output = "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene." # fmt: skip
|
expected_outputs = Expectations(
|
||||||
|
{
|
||||||
|
("xpu", 3): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest lake.",
|
||||||
|
("cuda", 7): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene.",
|
||||||
|
}
|
||||||
|
) # fmt: skip
|
||||||
|
expected_output = expected_outputs.get_expectation()
|
||||||
|
|
||||||
print("decoded_output", decoded_output)
|
print("decoded_output", decoded_output)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
decoded_output,
|
decoded_output,
|
||||||
@@ -534,7 +560,13 @@ class AyaVisionIntegrationTest(unittest.TestCase):
|
|||||||
# Check second output
|
# Check second output
|
||||||
decoded_output = processor.decode(output[1, inputs["input_ids"].shape[1] :], skip_special_tokens=True)
|
decoded_output = processor.decode(output[1, inputs["input_ids"].shape[1] :], skip_special_tokens=True)
|
||||||
print("decoded_output", decoded_output)
|
print("decoded_output", decoded_output)
|
||||||
expected_output = "The first image showcases the Statue of Liberty, a colossal neoclassical sculpture on Liberty Island in New York Harbor. Standing at a" # fmt: skip
|
expected_outputs = Expectations(
|
||||||
|
{
|
||||||
|
("xpu", 3): "The first image showcases the Statue of Liberty, a colossal neoclassical sculpture on Liberty Island in New York Harbor. Standing at ",
|
||||||
|
("cuda", 7): "The first image showcases the Statue of Liberty, a colossal neoclassical sculpture on Liberty Island in New York Harbor. Standing at a",
|
||||||
|
}
|
||||||
|
) # fmt: skip
|
||||||
|
expected_output = expected_outputs.get_expectation()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
decoded_output,
|
decoded_output,
|
||||||
expected_output,
|
expected_output,
|
||||||
|
|||||||
Reference in New Issue
Block a user