Remove repeated prepare_images in processor tests (#33163)

* Remove repeated prepare_images

* Address comments - update docstring; explanatory comment
This commit is contained in:
amyeroberts
2024-09-09 13:20:27 +01:00
committed by GitHub
parent 0574fa668b
commit f745e7d3f9
21 changed files with 140 additions and 304 deletions

View File

@@ -19,26 +19,27 @@ import tempfile
import unittest
from typing import List
import numpy as np
from transformers import PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast
from transformers.models.layoutlmv2 import LayoutLMv2Tokenizer, LayoutLMv2TokenizerFast
from transformers.models.layoutlmv2 import LayoutLMv2Processor, LayoutLMv2Tokenizer, LayoutLMv2TokenizerFast
from transformers.models.layoutlmv2.tokenization_layoutlmv2 import VOCAB_FILES_NAMES
from transformers.testing_utils import require_pytesseract, require_tokenizers, require_torch, slow
from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytesseract_available
from ...test_processing_common import ProcessorTesterMixin
if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv2ImageProcessor, LayoutLMv2Processor
from transformers import LayoutLMv2ImageProcessor
@require_pytesseract
@require_tokenizers
class LayoutLMv2ProcessorTest(unittest.TestCase):
class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
tokenizer_class = LayoutLMv2Tokenizer
rust_tokenizer_class = LayoutLMv2TokenizerFast
processor_class = LayoutLMv2Processor
def setUp(self):
vocab_tokens = [
@@ -88,17 +89,6 @@ class LayoutLMv2ProcessorTest(unittest.TestCase):
def tearDown(self):
shutil.rmtree(self.tmpdirname)
def prepare_image_inputs(self):
"""This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
or a list of PyTorch tensors if one specifies torchify=True.
"""
image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]
image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]
return image_inputs
def test_save_load_pretrained_default(self):
image_processor = self.get_image_processor()
tokenizers = self.get_tokenizers()