Add support for post-processing kwargs in image-text-to-text pipeline (#35374)

* fix error and improve pipeline

* add processing_kwargs to apply_chat_template

* change default post_process kwarg to args

* Fix slow tests

* fix copies
This commit is contained in:
Yoni Gozlan
2025-02-18 17:43:36 -05:00
committed by GitHub
parent 9b479a245b
commit 9f51dc2535
8 changed files with 91 additions and 41 deletions

View File

@@ -124,7 +124,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
],
}
]
outputs = pipe([image_ny, image_chicago], text=messages)
outputs = pipe([image_ny, image_chicago], text=messages, return_full_text=False, max_new_tokens=10)
self.assertEqual(
outputs,
[
@@ -139,20 +139,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
],
}
],
"generated_text": [
{
"role": "user",
"content": [
{"type": "text", "text": "Whats the difference between these two images?"},
{"type": "image"},
{"type": "image"},
],
},
{
"role": "assistant",
"content": "The first image shows a statue of the Statue of Liberty in the foreground, while the second image shows",
},
],
"generated_text": "The first image shows a statue of Liberty in the",
}
],
)
@@ -179,7 +166,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
],
},
]
outputs = pipe(text=messages)
outputs = pipe(text=messages, max_new_tokens=10)
self.assertEqual(
outputs,
[
@@ -213,7 +200,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
"content": [
{
"type": "text",
"text": "There is a dog and a person in the image. The dog is sitting on the sand, and the person is sitting on",
"text": "There is a dog and a person in the image. The dog is sitting",
}
],
},
@@ -238,7 +225,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
],
}
]
outputs = pipe(text=messages, return_full_text=False)
outputs = pipe(text=messages, return_full_text=False, max_new_tokens=10)
self.assertEqual(
outputs,
[
@@ -255,7 +242,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
],
}
],
"generated_text": "In the image, a woman is sitting on the sandy beach, her legs crossed in a relaxed manner",
"generated_text": "In the image, a woman is sitting on the",
}
],
)
@@ -263,7 +250,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
@slow
@require_torch
def test_model_pt_chat_template_image_url(self):
pipe = pipeline("image-text-to-text", model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
messages = [
{
"role": "user",
@@ -279,7 +266,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
}
]
outputs = pipe(text=messages, return_full_text=False, max_new_tokens=10)[0]["generated_text"]
self.assertEqual(outputs, "The image captures the iconic Statue of Liberty, a")
self.assertEqual(outputs, "A statue of liberty in the foreground of a city")
@slow
@require_torch