Remove repeated prepare_images in processor tests (#33163)

* Remove repeated prepare_images

* Address comments - update docstring; explanatory comment
This commit is contained in:
amyeroberts
2024-09-09 13:20:27 +01:00
committed by GitHub
parent 0574fa668b
commit f745e7d3f9
21 changed files with 140 additions and 304 deletions

View File

@@ -23,6 +23,7 @@ import numpy as np
import pytest
import requests
from transformers.models.auto.processing_auto import processor_class_from_name
from transformers.testing_utils import (
get_tests_dir,
require_sentencepiece,
@@ -32,6 +33,8 @@ from transformers.testing_utils import (
)
from transformers.utils import is_vision_available
from ...test_processing_common import ProcessorTesterMixin
if is_vision_available():
from PIL import Image
@@ -52,7 +55,9 @@ SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model")
@require_sentencepiece
@require_tokenizers
@require_vision
class Kosmos2ProcessorTest(unittest.TestCase):
class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor_class = Kosmos2Processor
def setUp(self):
self.tmpdirname = tempfile.mkdtemp()
@@ -65,6 +70,20 @@ class Kosmos2ProcessorTest(unittest.TestCase):
processor = Kosmos2Processor(image_processor, fast_tokenizer)
processor.save_pretrained(self.tmpdirname)
# We override this method to take the fast tokenizer or image processor by default
def get_component(self, attribute, **kwargs):
assert attribute in self.processor_class.attributes
component_class_name = getattr(self.processor_class, f"{attribute}_class")
if isinstance(component_class_name, tuple):
component_class_name = component_class_name[-1]
component_class = processor_class_from_name(component_class_name)
component = component_class.from_pretrained(self.tmpdirname, **kwargs) # noqa
if attribute == "tokenizer" and not component.pad_token:
component.pad_token = "[TEST_PAD]"
return component
def get_tokenizer(self, **kwargs):
return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).tokenizer
@@ -74,17 +93,6 @@ class Kosmos2ProcessorTest(unittest.TestCase):
def tearDown(self):
shutil.rmtree(self.tmpdirname)
def prepare_image_inputs(self):
"""This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
or a list of PyTorch tensors if one specifies torchify=True.
"""
image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
return image_inputs
def test_image_procesor_load_save_reload(self):
# make sure load from Hub repo. -> save -> reload locally work
image_processor = CLIPImageProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")