Add Image To Text Generation pipeline (#18821)
* Add Image2TextGenerationPipeline to supported pipelines * Add Flax and Tensorflow support * Add Flax and Tensorflow small tests * Add default model for Tensorflow * Add docstring * Fix doc style * Add tiny models for pytorch and flax * Remove flax from pipeline. Fix tests * Use ydshieh/vit-gpt2-coco-en as a default for both PyTorch and Tensorflow * Fix Tensorflow support Co-authored-by: Olivier Dehaene <olivier@huggingface.co>
This commit is contained in:
171
tests/pipelines/test_pipelines_image2text_generation.py
Normal file
171
tests/pipelines/test_pipelines_image2text_generation.py
Normal file
@@ -0,0 +1,171 @@
|
||||
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
|
||||
from transformers import MODEL_FOR_VISION_2_SEQ_MAPPING, TF_MODEL_FOR_VISION_2_SEQ_MAPPING, is_vision_available
|
||||
from transformers.pipelines import pipeline
|
||||
from transformers.testing_utils import is_pipeline_test, require_tf, require_torch, require_vision, slow
|
||||
|
||||
from .test_pipelines_common import ANY, PipelineTestCaseMeta
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
from PIL import Image
|
||||
else:
|
||||
|
||||
class Image:
|
||||
@staticmethod
|
||||
def open(*args, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
@is_pipeline_test
|
||||
@require_vision
|
||||
class Image2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||
model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
|
||||
tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING
|
||||
|
||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||
pipe = pipeline("image2text-generation", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
examples = [
|
||||
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
|
||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||
]
|
||||
return pipe, examples
|
||||
|
||||
def run_pipeline_test(self, pipe, examples):
|
||||
outputs = pipe(examples)
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
[{"generated_text": ANY(str)}],
|
||||
[{"generated_text": ANY(str)}],
|
||||
],
|
||||
)
|
||||
|
||||
@require_tf
|
||||
def test_small_model_tf(self):
|
||||
pipe = pipeline("image2text-generation", model="hf-internal-testing/tiny-random-vit-gpt2")
|
||||
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
||||
|
||||
outputs = pipe(image)
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
{
|
||||
"generated_text": (
|
||||
" intermedi intermedi intermedi intermedi intermedi "
|
||||
"explorer explorer explorer explorer explorer explorer "
|
||||
"explorer medicine medicine medicine medicine medicine "
|
||||
"medicine medicine"
|
||||
)
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
outputs = pipe([image, image])
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
[
|
||||
{
|
||||
"generated_text": (
|
||||
" intermedi intermedi intermedi intermedi intermedi "
|
||||
"explorer explorer explorer explorer explorer explorer "
|
||||
"explorer medicine medicine medicine medicine medicine "
|
||||
"medicine medicine"
|
||||
)
|
||||
},
|
||||
],
|
||||
[
|
||||
{
|
||||
"generated_text": (
|
||||
" intermedi intermedi intermedi intermedi intermedi "
|
||||
"explorer explorer explorer explorer explorer explorer "
|
||||
"explorer medicine medicine medicine medicine medicine "
|
||||
"medicine medicine"
|
||||
)
|
||||
},
|
||||
],
|
||||
],
|
||||
)
|
||||
|
||||
@require_torch
|
||||
def test_small_model_pt(self):
|
||||
pipe = pipeline("image2text-generation", model="hf-internal-testing/tiny-random-vit-gpt2")
|
||||
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
||||
|
||||
outputs = pipe(image)
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
{
|
||||
"generated_text": "growthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthGOGO"
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
outputs = pipe([image, image])
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
[
|
||||
{
|
||||
"generated_text": "growthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthGOGO"
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"generated_text": "growthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthgrowthGOGO"
|
||||
}
|
||||
],
|
||||
],
|
||||
)
|
||||
|
||||
@slow
|
||||
@require_torch
|
||||
def test_large_model_pt(self):
|
||||
pipe = pipeline("image2text-generation", model="ydshieh/vit-gpt2-coco-en")
|
||||
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
||||
|
||||
outputs = pipe(image)
|
||||
self.assertEqual(outputs, [{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}])
|
||||
|
||||
outputs = pipe([image, image])
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
[{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}],
|
||||
[{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}],
|
||||
],
|
||||
)
|
||||
|
||||
@slow
|
||||
@require_tf
|
||||
def test_large_model_tf(self):
|
||||
pipe = pipeline("image2text-generation", model="ydshieh/vit-gpt2-coco-en")
|
||||
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
||||
|
||||
outputs = pipe(image)
|
||||
self.assertEqual(outputs, [{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}])
|
||||
|
||||
outputs = pipe([image, image])
|
||||
self.assertEqual(
|
||||
outputs,
|
||||
[
|
||||
[{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}],
|
||||
[{"generated_text": "a cat laying on a blanket next to a cat laying on a bed "}],
|
||||
],
|
||||
)
|
||||
Reference in New Issue
Block a user