Uniformize kwargs for Udop processor and update docs (#33628)

* Add optional kwargs and uniformize udop * cleanup Unpack * nit Udop
2024-09-23 12:47:32 -04:00
parent be9cf070ee
commit 1456120929
3 changed files with 110 additions and 88 deletions
--- a/tests/models/udop/test_processor_udop.py
+++ b/tests/models/udop/test_processor_udop.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import json
-import os
 import shutil
 import tempfile
 import unittest
@@ -34,7 +32,7 @@ from transformers.testing_utils import (
    require_torch,
    slow,
 )
-from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytesseract_available, is_torch_available
+from transformers.utils import cached_property, is_pytesseract_available, is_torch_available

 from ...test_processing_common import ProcessorTesterMixin

@@ -55,20 +53,19 @@ if is_pytesseract_available():
 class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    tokenizer_class = UdopTokenizer
    rust_tokenizer_class = UdopTokenizerFast
-    maxDiff = None
    processor_class = UdopProcessor
+    maxDiff = None

    def setUp(self):
-        image_processor_map = {
-            "do_resize": True,
-            "size": 224,
-            "apply_ocr": True,
-        }
-
        self.tmpdirname = tempfile.mkdtemp()
-        self.feature_extraction_file = os.path.join(self.tmpdirname, FEATURE_EXTRACTOR_NAME)
-        with open(self.feature_extraction_file, "w", encoding="utf-8") as fp:
-            fp.write(json.dumps(image_processor_map) + "\n")
+        image_processor = LayoutLMv3ImageProcessor(
+            do_resize=True,
+            size=224,
+            apply_ocr=True,
+        )
+        tokenizer = UdopTokenizer.from_pretrained("microsoft/udop-large")
+        processor = UdopProcessor(image_processor=image_processor, tokenizer=tokenizer)
+        processor.save_pretrained(self.tmpdirname)

        self.tokenizer_pretrained_name = "microsoft/udop-large"

@@ -80,15 +77,15 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def get_tokenizer(self, **kwargs) -> PreTrainedTokenizer:
        return self.tokenizer_class.from_pretrained(self.tokenizer_pretrained_name, **kwargs)

+    def get_image_processor(self, **kwargs):
+        return LayoutLMv3ImageProcessor.from_pretrained(self.tmpdirname, **kwargs)
+
    def get_rust_tokenizer(self, **kwargs) -> PreTrainedTokenizerFast:
        return self.rust_tokenizer_class.from_pretrained(self.tokenizer_pretrained_name, **kwargs)

    def get_tokenizers(self, **kwargs) -> List[PreTrainedTokenizerBase]:
        return [self.get_tokenizer(**kwargs), self.get_rust_tokenizer(**kwargs)]

-    def get_image_processor(self, **kwargs):
-        return LayoutLMv3ImageProcessor.from_pretrained(self.tmpdirname, **kwargs)
-
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

@@ -153,7 +150,7 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        input_str = "lower newer"
        image_input = self.prepare_image_inputs()

-        inputs = processor(text=input_str, images=image_input)
+        inputs = processor(images=image_input, text=input_str)

        self.assertListEqual(list(inputs.keys()), processor.model_input_names)

@@ -472,7 +469,7 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
            question = "What's his name?"
            words = ["hello", "world"]
            boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]
-            input_processor = processor(images[0], question, words, boxes, return_tensors="pt")
+            input_processor = processor(images[0], question, text_pair=words, boxes=boxes, return_tensors="pt")

            # verify keys
            expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
@@ -488,7 +485,9 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
            questions = ["How old is he?", "what's the time"]
            words = [["hello", "world"], ["my", "name", "is", "niels"]]
            boxes = [[[1, 2, 3, 4], [5, 6, 7, 8]], [[3, 2, 5, 1], [6, 7, 4, 2], [3, 9, 2, 4], [1, 1, 2, 3]]]
-            input_processor = processor(images, questions, words, boxes, padding=True, return_tensors="pt")
+            input_processor = processor(
+                images, questions, text_pair=words, boxes=boxes, padding=True, return_tensors="pt"
+            )

            # verify keys
            expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]