Fix naming issue with ImageToText pipeline (#18864)
Co-authored-by: Olivier Dehaene <olivier@huggingface.co>
This commit is contained in:
@@ -29,7 +29,7 @@ There are two categories of pipeline abstractions to be aware about:
|
|||||||
- [`FillMaskPipeline`]
|
- [`FillMaskPipeline`]
|
||||||
- [`ImageClassificationPipeline`]
|
- [`ImageClassificationPipeline`]
|
||||||
- [`ImageSegmentationPipeline`]
|
- [`ImageSegmentationPipeline`]
|
||||||
- [`Image2TextGenerationPipeline`]
|
- [`ImageToTextPipeline`]
|
||||||
- [`ObjectDetectionPipeline`]
|
- [`ObjectDetectionPipeline`]
|
||||||
- [`QuestionAnsweringPipeline`]
|
- [`QuestionAnsweringPipeline`]
|
||||||
- [`SummarizationPipeline`]
|
- [`SummarizationPipeline`]
|
||||||
@@ -366,9 +366,9 @@ That should enable you to do all the custom code you want.
|
|||||||
- __call__
|
- __call__
|
||||||
- all
|
- all
|
||||||
|
|
||||||
### Image2TextGenerationPipeline
|
### ImageToTextPipeline
|
||||||
|
|
||||||
[[autodoc]] Image2TextGenerationPipeline
|
[[autodoc]] ImageToTextPipeline
|
||||||
- __call__
|
- __call__
|
||||||
- all
|
- all
|
||||||
|
|
||||||
|
|||||||
@@ -384,9 +384,9 @@ _import_structure = {
|
|||||||
"CsvPipelineDataFormat",
|
"CsvPipelineDataFormat",
|
||||||
"FeatureExtractionPipeline",
|
"FeatureExtractionPipeline",
|
||||||
"FillMaskPipeline",
|
"FillMaskPipeline",
|
||||||
"Image2TextGenerationPipeline",
|
|
||||||
"ImageClassificationPipeline",
|
"ImageClassificationPipeline",
|
||||||
"ImageSegmentationPipeline",
|
"ImageSegmentationPipeline",
|
||||||
|
"ImageToTextPipeline",
|
||||||
"JsonPipelineDataFormat",
|
"JsonPipelineDataFormat",
|
||||||
"NerPipeline",
|
"NerPipeline",
|
||||||
"ObjectDetectionPipeline",
|
"ObjectDetectionPipeline",
|
||||||
@@ -3192,9 +3192,9 @@ if TYPE_CHECKING:
|
|||||||
CsvPipelineDataFormat,
|
CsvPipelineDataFormat,
|
||||||
FeatureExtractionPipeline,
|
FeatureExtractionPipeline,
|
||||||
FillMaskPipeline,
|
FillMaskPipeline,
|
||||||
Image2TextGenerationPipeline,
|
|
||||||
ImageClassificationPipeline,
|
ImageClassificationPipeline,
|
||||||
ImageSegmentationPipeline,
|
ImageSegmentationPipeline,
|
||||||
|
ImageToTextPipeline,
|
||||||
JsonPipelineDataFormat,
|
JsonPipelineDataFormat,
|
||||||
NerPipeline,
|
NerPipeline,
|
||||||
ObjectDetectionPipeline,
|
ObjectDetectionPipeline,
|
||||||
|
|||||||
@@ -53,9 +53,9 @@ from .base import (
|
|||||||
from .conversational import Conversation, ConversationalPipeline
|
from .conversational import Conversation, ConversationalPipeline
|
||||||
from .feature_extraction import FeatureExtractionPipeline
|
from .feature_extraction import FeatureExtractionPipeline
|
||||||
from .fill_mask import FillMaskPipeline
|
from .fill_mask import FillMaskPipeline
|
||||||
from .image2text_generation import Image2TextGenerationPipeline
|
|
||||||
from .image_classification import ImageClassificationPipeline
|
from .image_classification import ImageClassificationPipeline
|
||||||
from .image_segmentation import ImageSegmentationPipeline
|
from .image_segmentation import ImageSegmentationPipeline
|
||||||
|
from .image_to_text import ImageToTextPipeline
|
||||||
from .object_detection import ObjectDetectionPipeline
|
from .object_detection import ObjectDetectionPipeline
|
||||||
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline
|
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline
|
||||||
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline
|
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline
|
||||||
@@ -305,8 +305,8 @@ SUPPORTED_TASKS = {
|
|||||||
"default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}},
|
"default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}},
|
||||||
"type": "image",
|
"type": "image",
|
||||||
},
|
},
|
||||||
"image2text-generation": {
|
"image-to-text": {
|
||||||
"impl": Image2TextGenerationPipeline,
|
"impl": ImageToTextPipeline,
|
||||||
"tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (),
|
"tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (),
|
||||||
"pt": (AutoModelForVision2Seq,) if is_torch_available() else (),
|
"pt": (AutoModelForVision2Seq,) if is_torch_available() else (),
|
||||||
"default": {
|
"default": {
|
||||||
|
|||||||
@@ -26,13 +26,12 @@ logger = logging.get_logger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
@add_end_docstrings(PIPELINE_INIT_ARGS)
|
@add_end_docstrings(PIPELINE_INIT_ARGS)
|
||||||
class Image2TextGenerationPipeline(Pipeline):
|
class ImageToTextPipeline(Pipeline):
|
||||||
"""
|
"""
|
||||||
Image2Text Generation pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given
|
Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
|
||||||
image.
|
|
||||||
|
|
||||||
This image to text generation pipeline can currently be loaded from pipeline() using the following task identifier:
|
This image to text pipeline can currently be loaded from pipeline() using the following task identifier:
|
||||||
"image2text-generation".
|
"image-to-text".
|
||||||
|
|
||||||
See the list of available models on
|
See the list of available models on
|
||||||
[huggingface.co/models](https://huggingface.co/models?pipeline_tag=image-to-text).
|
[huggingface.co/models](https://huggingface.co/models?pipeline_tag=image-to-text).
|
||||||
@@ -33,12 +33,12 @@ else:
|
|||||||
|
|
||||||
@is_pipeline_test
|
@is_pipeline_test
|
||||||
@require_vision
|
@require_vision
|
||||||
class Image2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
class ImageToTextPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||||
model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
|
model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
|
||||||
tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING
|
tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING
|
||||||
|
|
||||||
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
def get_test_pipeline(self, model, tokenizer, feature_extractor):
|
||||||
pipe = pipeline("image2text-generation", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
|
pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||||
examples = [
|
examples = [
|
||||||
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
|
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
|
||||||
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
"./tests/fixtures/tests_samples/COCO/000000039769.png",
|
||||||
@@ -57,7 +57,7 @@ class Image2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTes
|
|||||||
|
|
||||||
@require_tf
|
@require_tf
|
||||||
def test_small_model_tf(self):
|
def test_small_model_tf(self):
|
||||||
pipe = pipeline("image2text-generation", model="hf-internal-testing/tiny-random-vit-gpt2")
|
pipe = pipeline("image-to-text", model="hf-internal-testing/tiny-random-vit-gpt2")
|
||||||
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
||||||
|
|
||||||
outputs = pipe(image)
|
outputs = pipe(image)
|
||||||
@@ -104,7 +104,7 @@ class Image2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTes
|
|||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
def test_small_model_pt(self):
|
def test_small_model_pt(self):
|
||||||
pipe = pipeline("image2text-generation", model="hf-internal-testing/tiny-random-vit-gpt2")
|
pipe = pipeline("image-to-text", model="hf-internal-testing/tiny-random-vit-gpt2")
|
||||||
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
||||||
|
|
||||||
outputs = pipe(image)
|
outputs = pipe(image)
|
||||||
@@ -137,7 +137,7 @@ class Image2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTes
|
|||||||
@slow
|
@slow
|
||||||
@require_torch
|
@require_torch
|
||||||
def test_large_model_pt(self):
|
def test_large_model_pt(self):
|
||||||
pipe = pipeline("image2text-generation", model="ydshieh/vit-gpt2-coco-en")
|
pipe = pipeline("image-to-text", model="ydshieh/vit-gpt2-coco-en")
|
||||||
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
||||||
|
|
||||||
outputs = pipe(image)
|
outputs = pipe(image)
|
||||||
@@ -155,7 +155,7 @@ class Image2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTes
|
|||||||
@slow
|
@slow
|
||||||
@require_tf
|
@require_tf
|
||||||
def test_large_model_tf(self):
|
def test_large_model_tf(self):
|
||||||
pipe = pipeline("image2text-generation", model="ydshieh/vit-gpt2-coco-en")
|
pipe = pipeline("image-to-text", model="ydshieh/vit-gpt2-coco-en")
|
||||||
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
image = "./tests/fixtures/tests_samples/COCO/000000039769.png"
|
||||||
|
|
||||||
outputs = pipe(image)
|
outputs = pipe(image)
|
||||||
Reference in New Issue
Block a user