Remove repeated prepare_images in processor tests (#33163)

* Remove repeated prepare_images * Address comments - update docstring; explanatory comment
2024-09-09 13:20:27 +01:00
parent 0574fa668b
commit f745e7d3f9
21 changed files with 140 additions and 304 deletions
--- a/tests/models/align/test_processor_align.py
+++ b/tests/models/align/test_processor_align.py
@@ -18,7 +18,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers import BertTokenizer, BertTokenizerFast
@@ -30,8 +29,6 @@ from ...test_processing_common import ProcessorTesterMixin


 if is_vision_available():
-    from PIL import Image
-
    from transformers import AlignProcessor, EfficientNetImageProcessor


@@ -86,15 +83,6 @@ class AlignProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()
--- a/tests/models/blip/test_processor_blip.py
+++ b/tests/models/blip/test_processor_blip.py
@@ -15,21 +15,22 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers.testing_utils import require_vision
 from transformers.utils import is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import AutoProcessor, BertTokenizer, BlipImageProcessor, BlipProcessor, PreTrainedTokenizerFast


@require_vision
-class BlipProcessorTest(unittest.TestCase):
+class BlipProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = BlipProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -49,17 +50,6 @@ class BlipProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_additional_features(self):
        processor = BlipProcessor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
        processor.save_pretrained(self.tmpdirname)
--- a/tests/models/blip_2/test_processor_blip_2.py
+++ b/tests/models/blip_2/test_processor_blip_2.py
@@ -15,21 +15,22 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers.testing_utils import require_vision
 from transformers.utils import is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import AutoProcessor, Blip2Processor, BlipImageProcessor, GPT2Tokenizer, PreTrainedTokenizerFast


@require_vision
-class Blip2ProcessorTest(unittest.TestCase):
+class Blip2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = Blip2Processor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -49,17 +50,6 @@ class Blip2ProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_additional_features(self):
        processor = Blip2Processor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
        processor.save_pretrained(self.tmpdirname)
--- a/tests/models/chinese_clip/test_processor_chinese_clip.py
+++ b/tests/models/chinese_clip/test_processor_chinese_clip.py
@@ -18,7 +18,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers import BertTokenizer, BertTokenizerFast
@@ -26,15 +25,17 @@ from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES
 from transformers.testing_utils import require_vision
 from transformers.utils import FEATURE_EXTRACTOR_NAME, is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import ChineseCLIPImageProcessor, ChineseCLIPProcessor


@require_vision
-class ChineseCLIPProcessorTest(unittest.TestCase):
+class ChineseCLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = ChineseCLIPProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -76,6 +77,11 @@ class ChineseCLIPProcessorTest(unittest.TestCase):
        with open(self.image_processor_file, "w", encoding="utf-8") as fp:
            json.dump(image_processor_map, fp)

+        tokenizer = self.get_tokenizer()
+        image_processor = self.get_image_processor()
+        processor = ChineseCLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)
+        processor.save_pretrained(self.tmpdirname)
+
    def get_tokenizer(self, **kwargs):
        return BertTokenizer.from_pretrained(self.tmpdirname, **kwargs)

@@ -88,17 +94,6 @@ class ChineseCLIPProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()
--- a/tests/models/clip/test_processor_clip.py
+++ b/tests/models/clip/test_processor_clip.py
@@ -18,7 +18,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers import CLIPTokenizer, CLIPTokenizerFast
@@ -30,8 +29,6 @@ from ...test_processing_common import ProcessorTesterMixin


 if is_vision_available():
-    from PIL import Image
-
    from transformers import CLIPImageProcessor, CLIPProcessor


@@ -79,17 +76,6 @@ class CLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()
--- a/tests/models/clipseg/test_processor_clipseg.py
+++ b/tests/models/clipseg/test_processor_clipseg.py
@@ -18,7 +18,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers import CLIPTokenizer, CLIPTokenizerFast
@@ -26,15 +25,17 @@ from transformers.models.clip.tokenization_clip import VOCAB_FILES_NAMES
 from transformers.testing_utils import require_vision
 from transformers.utils import IMAGE_PROCESSOR_NAME, is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import CLIPSegProcessor, ViTImageProcessor


@require_vision
-class CLIPSegProcessorTest(unittest.TestCase):
+class CLIPSegProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = CLIPSegProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -75,16 +76,6 @@ class CLIPSegProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True."""
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()
--- a/tests/models/flava/test_processor_flava.py
+++ b/tests/models/flava/test_processor_flava.py
@@ -19,7 +19,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers import BertTokenizer, BertTokenizerFast
@@ -27,10 +26,10 @@ from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES
 from transformers.testing_utils import require_vision
 from transformers.utils import IMAGE_PROCESSOR_NAME, is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import FlavaImageProcessor, FlavaProcessor
    from transformers.models.flava.image_processing_flava import (
        FLAVA_CODEBOOK_MEAN,
@@ -41,7 +40,9 @@ if is_vision_available():


@require_vision
-class FlavaProcessorTest(unittest.TestCase):
+class FlavaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = FlavaProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -91,17 +92,6 @@ class FlavaProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()
--- a/tests/models/git/test_processor_git.py
+++ b/tests/models/git/test_processor_git.py
@@ -15,21 +15,22 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers.testing_utils import require_vision
 from transformers.utils import is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import AutoProcessor, BertTokenizer, CLIPImageProcessor, GitProcessor, PreTrainedTokenizerFast


@require_vision
-class GitProcessorTest(unittest.TestCase):
+class GitProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = GitProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -51,17 +52,6 @@ class GitProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_additional_features(self):
        processor = GitProcessor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
        processor.save_pretrained(self.tmpdirname)
--- a/tests/models/grounding_dino/test_processor_grounding_dino.py
+++ b/tests/models/grounding_dino/test_processor_grounding_dino.py
@@ -18,7 +18,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers import BertTokenizer, BertTokenizerFast, GroundingDinoProcessor
@@ -35,8 +34,6 @@ if is_torch_available():
    from transformers.models.grounding_dino.modeling_grounding_dino import GroundingDinoObjectDetectionOutput

 if is_vision_available():
-    from PIL import Image
-
    from transformers import GroundingDinoImageProcessor


@@ -96,18 +93,6 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.prepare_image_inputs
-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def get_fake_grounding_dino_output(self):
        torch.manual_seed(42)
        return GroundingDinoObjectDetectionOutput(
--- a/tests/models/instructblip/test_processor_instructblip.py
+++ b/tests/models/instructblip/test_processor_instructblip.py
@@ -15,7 +15,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers.testing_utils import require_torch, require_vision
@@ -25,8 +24,6 @@ from ...test_processing_common import ProcessorTesterMixin


 if is_vision_available():
-    from PIL import Image
-
    from transformers import (
        AutoProcessor,
        BertTokenizerFast,
@@ -64,17 +61,6 @@ class InstructBlipProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_additional_features(self):
        processor = InstructBlipProcessor(
            tokenizer=self.get_tokenizer(),
--- a/tests/models/kosmos2/test_processor_kosmos2.py
+++ b/tests/models/kosmos2/test_processor_kosmos2.py
@@ -23,6 +23,7 @@ import numpy as np
 import pytest
 import requests

+from transformers.models.auto.processing_auto import processor_class_from_name
 from transformers.testing_utils import (
    get_tests_dir,
    require_sentencepiece,
@@ -32,6 +33,8 @@ from transformers.testing_utils import (
 )
 from transformers.utils import is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
    from PIL import Image
@@ -52,7 +55,9 @@ SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model")
@require_sentencepiece
@require_tokenizers
@require_vision
-class Kosmos2ProcessorTest(unittest.TestCase):
+class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = Kosmos2Processor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -65,6 +70,20 @@ class Kosmos2ProcessorTest(unittest.TestCase):
        processor = Kosmos2Processor(image_processor, fast_tokenizer)
        processor.save_pretrained(self.tmpdirname)

+    # We override this method to take the fast tokenizer or image processor by default
+    def get_component(self, attribute, **kwargs):
+        assert attribute in self.processor_class.attributes
+        component_class_name = getattr(self.processor_class, f"{attribute}_class")
+        if isinstance(component_class_name, tuple):
+            component_class_name = component_class_name[-1]
+
+        component_class = processor_class_from_name(component_class_name)
+        component = component_class.from_pretrained(self.tmpdirname, **kwargs)  # noqa
+        if attribute == "tokenizer" and not component.pad_token:
+            component.pad_token = "[TEST_PAD]"
+
+        return component
+
    def get_tokenizer(self, **kwargs):
        return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).tokenizer

@@ -74,17 +93,6 @@ class Kosmos2ProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_image_procesor_load_save_reload(self):
        # make sure load from Hub repo. -> save -> reload locally work
        image_processor = CLIPImageProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
--- a/tests/models/layoutlmv2/test_processor_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_processor_layoutlmv2.py
@@ -19,26 +19,27 @@ import tempfile
 import unittest
 from typing import List

-import numpy as np
-
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast
-from transformers.models.layoutlmv2 import LayoutLMv2Tokenizer, LayoutLMv2TokenizerFast
+from transformers.models.layoutlmv2 import LayoutLMv2Processor, LayoutLMv2Tokenizer, LayoutLMv2TokenizerFast
 from transformers.models.layoutlmv2.tokenization_layoutlmv2 import VOCAB_FILES_NAMES
 from transformers.testing_utils import require_pytesseract, require_tokenizers, require_torch, slow
 from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytesseract_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_pytesseract_available():
    from PIL import Image

-    from transformers import LayoutLMv2ImageProcessor, LayoutLMv2Processor
+    from transformers import LayoutLMv2ImageProcessor


@require_pytesseract
@require_tokenizers
-class LayoutLMv2ProcessorTest(unittest.TestCase):
+class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    tokenizer_class = LayoutLMv2Tokenizer
    rust_tokenizer_class = LayoutLMv2TokenizerFast
+    processor_class = LayoutLMv2Processor

    def setUp(self):
        vocab_tokens = [
@@ -88,17 +89,6 @@ class LayoutLMv2ProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        image_processor = self.get_image_processor()
        tokenizers = self.get_tokenizers()
--- a/tests/models/layoutlmv3/test_processor_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_processor_layoutlmv3.py
@@ -19,26 +19,27 @@ import tempfile
 import unittest
 from typing import List

-import numpy as np
-
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast
-from transformers.models.layoutlmv3 import LayoutLMv3Tokenizer, LayoutLMv3TokenizerFast
+from transformers.models.layoutlmv3 import LayoutLMv3Processor, LayoutLMv3Tokenizer, LayoutLMv3TokenizerFast
 from transformers.models.layoutlmv3.tokenization_layoutlmv3 import VOCAB_FILES_NAMES
 from transformers.testing_utils import require_pytesseract, require_tokenizers, require_torch, slow
 from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytesseract_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_pytesseract_available():
    from PIL import Image

-    from transformers import LayoutLMv3ImageProcessor, LayoutLMv3Processor
+    from transformers import LayoutLMv3ImageProcessor


@require_pytesseract
@require_tokenizers
-class LayoutLMv3ProcessorTest(unittest.TestCase):
+class LayoutLMv3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    tokenizer_class = LayoutLMv3Tokenizer
    rust_tokenizer_class = LayoutLMv3TokenizerFast
+    processor_class = LayoutLMv3Processor

    def setUp(self):
        # Adapted from Sennrich et al. 2015 and https://github.com/rsennrich/subword-nmt
@@ -101,17 +102,6 @@ class LayoutLMv3ProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        image_processor = self.get_image_processor()
        tokenizers = self.get_tokenizers()
--- a/tests/models/layoutxlm/test_processor_layoutxlm.py
+++ b/tests/models/layoutxlm/test_processor_layoutxlm.py
@@ -19,10 +19,8 @@ import tempfile
 import unittest
 from typing import List

-import numpy as np
-
 from transformers import PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast
-from transformers.models.layoutxlm import LayoutXLMTokenizer, LayoutXLMTokenizerFast
+from transformers.models.layoutxlm import LayoutXLMProcessor, LayoutXLMTokenizer, LayoutXLMTokenizerFast
 from transformers.testing_utils import (
    require_pytesseract,
    require_sentencepiece,
@@ -32,19 +30,22 @@ from transformers.testing_utils import (
 )
 from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytesseract_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_pytesseract_available():
    from PIL import Image

-    from transformers import LayoutLMv2ImageProcessor, LayoutXLMProcessor
+    from transformers import LayoutLMv2ImageProcessor


@require_pytesseract
@require_sentencepiece
@require_tokenizers
-class LayoutXLMProcessorTest(unittest.TestCase):
+class LayoutXLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    tokenizer_class = LayoutXLMTokenizer
    rust_tokenizer_class = LayoutXLMTokenizerFast
+    processor_class = LayoutXLMProcessor

    def setUp(self):
        image_processor_map = {
@@ -61,6 +62,11 @@ class LayoutXLMProcessorTest(unittest.TestCase):
        # taken from `test_tokenization_layoutxlm.LayoutXLMTokenizationTest.test_save_pretrained`
        self.tokenizer_pretrained_name = "hf-internal-testing/tiny-random-layoutxlm"

+        tokenizer = self.get_tokenizer()
+        image_processor = self.get_image_processor()
+        processor = LayoutXLMProcessor(tokenizer=tokenizer, image_processor=image_processor)
+        processor.save_pretrained(self.tmpdirname)
+
    def get_tokenizer(self, **kwargs) -> PreTrainedTokenizer:
        return self.tokenizer_class.from_pretrained(self.tokenizer_pretrained_name, **kwargs)

@@ -76,17 +82,6 @@ class LayoutXLMProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        image_processor = self.get_image_processor()
        tokenizers = self.get_tokenizers()
--- a/tests/models/mgp_str/test_processor_mgp_str.py
+++ b/tests/models/mgp_str/test_processor_mgp_str.py
@@ -70,6 +70,17 @@ class MgpstrProcessorTest(unittest.TestCase):
        with open(self.image_processor_file, "w", encoding="utf-8") as fp:
            json.dump(image_processor_map, fp)

+    # We copy here rather than use the ProcessorTesterMixin as this processor has a `char_tokenizer` instad of a
+    # tokenizer attribute, which means all the tests would need to be overridden.
+    @require_vision
+    def prepare_image_inputs(self):
+        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
+        or a list of PyTorch tensors if one specifies torchify=True.
+        """
+        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
+        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
+        return image_inputs
+
    def get_tokenizer(self, **kwargs):
        return MgpstrTokenizer.from_pretrained(self.tmpdirname, **kwargs)

@@ -79,15 +90,6 @@ class MgpstrProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images."""
-
-        image_input = np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)
-
-        image_input = Image.fromarray(np.moveaxis(image_input, 0, -1))
-
-        return image_input
-
    def test_save_load_pretrained_default(self):
        tokenizer = self.get_tokenizer()
        image_processor = self.get_image_processor()
--- a/tests/models/owlvit/test_processor_owlvit.py
+++ b/tests/models/owlvit/test_processor_owlvit.py
@@ -18,7 +18,6 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers import CLIPTokenizer, CLIPTokenizerFast
@@ -26,15 +25,17 @@ from transformers.models.clip.tokenization_clip import VOCAB_FILES_NAMES
 from transformers.testing_utils import require_vision
 from transformers.utils import IMAGE_PROCESSOR_NAME, is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import OwlViTImageProcessor, OwlViTProcessor


@require_vision
-class OwlViTProcessorTest(unittest.TestCase):
+class OwlViTProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = OwlViTProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -75,17 +76,6 @@ class OwlViTProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()
--- a/tests/models/pix2struct/test_processor_pix2struct.py
+++ b/tests/models/pix2struct/test_processor_pix2struct.py
@@ -15,16 +15,15 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
 import pytest

 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import (
        AutoProcessor,
        Pix2StructImageProcessor,
@@ -36,7 +35,9 @@ if is_vision_available():

@require_vision
@require_torch
-class Pix2StructProcessorTest(unittest.TestCase):
+class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = Pix2StructProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -56,17 +57,6 @@ class Pix2StructProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """
-        This function prepares a list of random PIL images of the same fixed size.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_additional_features(self):
        processor = Pix2StructProcessor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
        processor.save_pretrained(self.tmpdirname)
--- a/tests/models/sam/test_processor_sam.py
+++ b/tests/models/sam/test_processor_sam.py
@@ -26,6 +26,8 @@ from transformers.testing_utils import (
 )
 from transformers.utils import is_tf_available, is_torch_available, is_vision_available

+from ...test_processing_common import prepare_image_inputs
+

 if is_vision_available():
    from PIL import Image
@@ -54,13 +56,10 @@ class SamProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

+    # Processor tester class can't use ProcessorTesterMixin atm because the processor is atypical e.g. only contains an image processor
    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-        return image_inputs
+        """This function prepares a list of PIL images."""
+        return prepare_image_inputs()

    def prepare_mask_inputs(self):
        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
@@ -166,16 +165,10 @@ class TFSamProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

+    # Processor tester class can't use ProcessorTesterMixin as processor is atypical e.g. only contains an image processor and it assumes torch
    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
+        """This function prepares a list of PIL images."""
+        return prepare_image_inputs()

    def test_save_load_pretrained_additional_features(self):
        processor = SamProcessor(image_processor=self.get_image_processor())
@@ -255,16 +248,10 @@ class SamProcessorEquivalenceTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

+    # Processor tester class can't use ProcessorTesterMixin atm because the processor is atypical e.g. only contains an image processor
    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
+        """This function prepares a list of PIL images."""
+        return prepare_image_inputs()

    @is_pt_tf_cross_test
    def test_post_process_masks_equivalence(self):
--- a/tests/models/udop/test_processor_udop.py
+++ b/tests/models/udop/test_processor_udop.py
@@ -19,12 +19,11 @@ import tempfile
 import unittest
 from typing import List

-import numpy as np
-
 from transformers import (
    PreTrainedTokenizer,
    PreTrainedTokenizerBase,
    PreTrainedTokenizerFast,
+    UdopProcessor,
    UdopTokenizer,
    UdopTokenizerFast,
 )
@@ -37,6 +36,8 @@ from transformers.testing_utils import (
 )
 from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytesseract_available, is_torch_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_torch_available():
    import torch
@@ -45,16 +46,17 @@ if is_torch_available():
 if is_pytesseract_available():
    from PIL import Image

-    from transformers import LayoutLMv3ImageProcessor, UdopProcessor
+    from transformers import LayoutLMv3ImageProcessor


@require_pytesseract
@require_sentencepiece
@require_tokenizers
-class UdopProcessorTest(unittest.TestCase):
+class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    tokenizer_class = UdopTokenizer
    rust_tokenizer_class = UdopTokenizerFast
    maxDiff = None
+    processor_class = UdopProcessor

    def setUp(self):
        image_processor_map = {
@@ -70,6 +72,11 @@ class UdopProcessorTest(unittest.TestCase):

        self.tokenizer_pretrained_name = "microsoft/udop-large"

+        image_processor = self.get_image_processor()
+        tokenizer = self.get_tokenizers()[0]
+        processor = UdopProcessor(image_processor=image_processor, tokenizer=tokenizer)
+        processor.save_pretrained(self.tmpdirname)
+
    def get_tokenizer(self, **kwargs) -> PreTrainedTokenizer:
        return self.tokenizer_class.from_pretrained(self.tokenizer_pretrained_name, **kwargs)

@@ -85,17 +92,6 @@ class UdopProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        image_processor = self.get_image_processor()
        tokenizers = self.get_tokenizers()
--- a/tests/models/vision_text_dual_encoder/test_processor_vision_text_dual_encoder.py
+++ b/tests/models/vision_text_dual_encoder/test_processor_vision_text_dual_encoder.py
@@ -18,23 +18,23 @@ import shutil
 import tempfile
 import unittest

-import numpy as np
-
 from transformers import BertTokenizerFast
 from transformers.models.bert.tokenization_bert import VOCAB_FILES_NAMES, BertTokenizer
 from transformers.testing_utils import require_tokenizers, require_vision
 from transformers.utils import IMAGE_PROCESSOR_NAME, is_vision_available

+from ...test_processing_common import ProcessorTesterMixin
+

 if is_vision_available():
-    from PIL import Image
-
    from transformers import VisionTextDualEncoderProcessor, ViTImageProcessor


@require_tokenizers
@require_vision
-class VisionTextDualEncoderProcessorTest(unittest.TestCase):
+class VisionTextDualEncoderProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+    processor_class = VisionTextDualEncoderProcessor
+
    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

@@ -54,6 +54,11 @@ class VisionTextDualEncoderProcessorTest(unittest.TestCase):
        with open(self.image_processor_file, "w", encoding="utf-8") as fp:
            json.dump(image_processor_map, fp)

+        tokenizer = self.get_tokenizer()
+        image_processor = self.get_image_processor()
+        processor = VisionTextDualEncoderProcessor(tokenizer=tokenizer, image_processor=image_processor)
+        processor.save_pretrained(self.tmpdirname)
+
    def get_tokenizer(self, **kwargs):
        return BertTokenizer.from_pretrained(self.tmpdirname, **kwargs)

@@ -63,17 +68,6 @@ class VisionTextDualEncoderProcessorTest(unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-
-        return image_inputs
-
    def test_save_load_pretrained_default(self):
        tokenizer = self.get_tokenizer()
        image_processor = self.get_image_processor()
--- a/tests/test_processing_common.py
+++ b/tests/test_processing_common.py
@@ -44,6 +44,13 @@ if is_vision_available():
    from transformers import CLIPImageProcessor


+def prepare_image_inputs():
+    """This function prepares a list of PIL images"""
+    image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
+    image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
+    return image_inputs
+
+
@require_torch
@require_vision
@require_torch
@@ -81,12 +88,8 @@ class ProcessorTesterMixin:

    @require_vision
    def prepare_image_inputs(self):
-        """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-        or a list of PyTorch tensors if one specifies torchify=True.
-        """
-        image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
-        image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
-        return image_inputs
+        """This function prepares a list of PIL images for testing"""
+        return prepare_image_inputs()

    def test_processor_to_json_string(self):
        processor = self.get_processor()