Process inputs directly in apply_chat_template in image-text-to-text pipeline (#35616)
* tokenize inputs directly in apply_chat_template * refactor processing * revert changes processing llava * Update docs * fix issue with str being iterable * add test chat text only * change function name
This commit is contained in:
@@ -66,6 +66,78 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
|
||||
],
|
||||
)
|
||||
|
||||
@require_torch
|
||||
def test_small_model_pt_token_text_only(self):
|
||||
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
|
||||
text = "What is the capital of France? Assistant:"
|
||||
|
||||
outputs = pipe(text=text)
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
{
|
||||
"input_text": "What is the capital of France? Assistant:",
|
||||
"generated_text": "What is the capital of France? Assistant: The capital of France is Paris.",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
messages = [
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Write a poem on Hugging Face, the company"},
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What is the capital of France?"},
|
||||
],
|
||||
},
|
||||
],
|
||||
]
|
||||
outputs = pipe(text=messages)
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
[
|
||||
{
|
||||
"input_text": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
|
||||
}
|
||||
],
|
||||
"generated_text": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"}],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Hugging Face, a company of minds\nWith tools and services that make our lives easier\nFrom",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"input_text": [
|
||||
{"role": "user", "content": [{"type": "text", "text": "What is the capital of France?"}]}
|
||||
],
|
||||
"generated_text": [
|
||||
{"role": "user", "content": [{"type": "text", "text": "What is the capital of France?"}]},
|
||||
{"role": "assistant", "content": "Paris"},
|
||||
],
|
||||
}
|
||||
],
|
||||
],
|
||||
)
|
||||
|
||||
@require_torch
|
||||
def test_small_model_pt_token(self):
|
||||
pipe = pipeline("image-text-to-text", model="llava-hf/llava-interleave-qwen-0.5b-hf")
|
||||
@@ -124,7 +196,7 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
|
||||
],
|
||||
}
|
||||
]
|
||||
outputs = pipe([image_ny, image_chicago], text=messages, return_full_text=False, max_new_tokens=10)
|
||||
outputs = pipe([image_ny, image_chicago], text=messages, return_full_text=True, max_new_tokens=10)
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
@@ -134,12 +206,37 @@ class ImageTextToTextPipelineTests(unittest.TestCase):
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What’s the difference between these two images?"},
|
||||
{"type": "image"},
|
||||
{"type": "image"},
|
||||
{
|
||||
"type": "image",
|
||||
"image": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"image": "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
"generated_text": "The first image shows a statue of Liberty in the",
|
||||
"generated_text": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What’s the difference between these two images?"},
|
||||
{
|
||||
"type": "image",
|
||||
"image": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg",
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"image": "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "The first image shows a statue of Liberty in the",
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user