Uniformize kwargs for Udop processor and update docs (#33628)

* Add optional kwargs and uniformize udop

* cleanup Unpack

* nit Udop
This commit is contained in:
Yoni Gozlan
2024-09-23 12:47:32 -04:00
committed by GitHub
parent be9cf070ee
commit 1456120929
3 changed files with 110 additions and 88 deletions

View File

@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import shutil
import tempfile
import unittest
@@ -34,7 +32,7 @@ from transformers.testing_utils import (
require_torch,
slow,
)
from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytesseract_available, is_torch_available
from transformers.utils import cached_property, is_pytesseract_available, is_torch_available
from ...test_processing_common import ProcessorTesterMixin
@@ -55,20 +53,19 @@ if is_pytesseract_available():
class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
tokenizer_class = UdopTokenizer
rust_tokenizer_class = UdopTokenizerFast
maxDiff = None
processor_class = UdopProcessor
maxDiff = None
def setUp(self):
image_processor_map = {
"do_resize": True,
"size": 224,
"apply_ocr": True,
}
self.tmpdirname = tempfile.mkdtemp()
self.feature_extraction_file = os.path.join(self.tmpdirname, FEATURE_EXTRACTOR_NAME)
with open(self.feature_extraction_file, "w", encoding="utf-8") as fp:
fp.write(json.dumps(image_processor_map) + "\n")
image_processor = LayoutLMv3ImageProcessor(
do_resize=True,
size=224,
apply_ocr=True,
)
tokenizer = UdopTokenizer.from_pretrained("microsoft/udop-large")
processor = UdopProcessor(image_processor=image_processor, tokenizer=tokenizer)
processor.save_pretrained(self.tmpdirname)
self.tokenizer_pretrained_name = "microsoft/udop-large"
@@ -80,15 +77,15 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def get_tokenizer(self, **kwargs) -> PreTrainedTokenizer:
return self.tokenizer_class.from_pretrained(self.tokenizer_pretrained_name, **kwargs)
def get_image_processor(self, **kwargs):
return LayoutLMv3ImageProcessor.from_pretrained(self.tmpdirname, **kwargs)
def get_rust_tokenizer(self, **kwargs) -> PreTrainedTokenizerFast:
return self.rust_tokenizer_class.from_pretrained(self.tokenizer_pretrained_name, **kwargs)
def get_tokenizers(self, **kwargs) -> List[PreTrainedTokenizerBase]:
return [self.get_tokenizer(**kwargs), self.get_rust_tokenizer(**kwargs)]
def get_image_processor(self, **kwargs):
return LayoutLMv3ImageProcessor.from_pretrained(self.tmpdirname, **kwargs)
def tearDown(self):
shutil.rmtree(self.tmpdirname)
@@ -153,7 +150,7 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
input_str = "lower newer"
image_input = self.prepare_image_inputs()
inputs = processor(text=input_str, images=image_input)
inputs = processor(images=image_input, text=input_str)
self.assertListEqual(list(inputs.keys()), processor.model_input_names)
@@ -472,7 +469,7 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
question = "What's his name?"
words = ["hello", "world"]
boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]
input_processor = processor(images[0], question, words, boxes, return_tensors="pt")
input_processor = processor(images[0], question, text_pair=words, boxes=boxes, return_tensors="pt")
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]
@@ -488,7 +485,9 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
questions = ["How old is he?", "what's the time"]
words = [["hello", "world"], ["my", "name", "is", "niels"]]
boxes = [[[1, 2, 3, 4], [5, 6, 7, 8]], [[3, 2, 5, 1], [6, 7, 4, 2], [3, 9, 2, 4], [1, 1, 2, 3]]]
input_processor = processor(images, questions, words, boxes, padding=True, return_tensors="pt")
input_processor = processor(
images, questions, text_pair=words, boxes=boxes, padding=True, return_tensors="pt"
)
# verify keys
expected_keys = ["attention_mask", "bbox", "input_ids", "pixel_values"]