Remove repeated prepare_images in processor tests (#33163)

* Remove repeated prepare_images * Address comments - update docstring; explanatory comment
2024-09-09 13:20:27 +01:00
parent 0574fa668b
commit f745e7d3f9
21 changed files with 140 additions and 304 deletions
--- a/tests/models/chinese_clip/test_processor_chinese_clip.py
+++ b/tests/models/chinese_clip/test_processor_chinese_clip.py
@@ -18,7 +18,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers import BertTokenizer, BertTokenizerFast
@@ -26,15 +25,17 @@ from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES
 from transformers.testing_utils import require_vision
 from transformers.utils import FEATURE_EXTRACTOR_NAME, is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import ChineseCLIPImageProcessor, ChineseCLIPProcessor


@require_vision
-class ChineseCLIPProcessorTest(unittest.TestCase):
+class ChineseCLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = ChineseCLIPProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -76,6 +77,11 @@ class ChineseCLIPProcessorTest(unittest.TestCase):
        with open(self.image_processor_file, "w", encoding="utf-8") as fp:
            json.dump(image_processor_map, fp)

+        tokenizer = self.get_tokenizer()
+        image_processor = self.get_image_processor()
+        processor = ChineseCLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)
+        processor.save_pretrained(self.tmpdirname)
+
    def get_tokenizer(self, **kwargs):
        return BertTokenizer.from_pretrained(self.tmpdirname, **kwargs)

@@ -88,17 +94,6 @@ class ChineseCLIPProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()