From 693720e56781790d65bdf4a6954a9cee47ce19c1 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 1 Jun 2022 16:26:37 +0200 Subject: [PATCH] Fix LayoutXLMProcessorTest (#17506) * fix Co-authored-by: ydshieh --- tests/models/layoutxlm/test_processor_layoutxlm.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/models/layoutxlm/test_processor_layoutxlm.py b/tests/models/layoutxlm/test_processor_layoutxlm.py index 69865f6d9f..d0d7eec28a 100644 --- a/tests/models/layoutxlm/test_processor_layoutxlm.py +++ b/tests/models/layoutxlm/test_processor_layoutxlm.py @@ -22,7 +22,6 @@ from typing import List from transformers import PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast from transformers.models.layoutxlm import LayoutXLMTokenizer, LayoutXLMTokenizerFast from transformers.testing_utils import ( - get_tests_dir, require_pytesseract, require_sentencepiece, require_tokenizers, @@ -38,9 +37,6 @@ if is_pytesseract_available(): from transformers import LayoutLMv2FeatureExtractor, LayoutXLMProcessor -SAMPLE_SP = get_tests_dir("fixtures/test_sentencepiece.model") - - @require_pytesseract @require_sentencepiece @require_tokenizers @@ -60,11 +56,14 @@ class LayoutXLMProcessorTest(unittest.TestCase): with open(self.feature_extraction_file, "w", encoding="utf-8") as fp: fp.write(json.dumps(feature_extractor_map) + "\n") + # taken from `test_tokenization_layoutxlm.LayoutXLMTokenizationTest.test_save_pretrained` + self.tokenizer_pretrained_name = "hf-internal-testing/tiny-random-layoutxlm" + def get_tokenizer(self, **kwargs) -> PreTrainedTokenizer: - return self.tokenizer_class.from_pretrained(SAMPLE_SP, **kwargs) + return self.tokenizer_class.from_pretrained(self.tokenizer_pretrained_name, **kwargs) def get_rust_tokenizer(self, **kwargs) -> PreTrainedTokenizerFast: - return self.rust_tokenizer_class.from_pretrained(SAMPLE_SP, **kwargs) + return self.rust_tokenizer_class.from_pretrained(self.tokenizer_pretrained_name, **kwargs) def get_tokenizers(self, **kwargs) -> List[PreTrainedTokenizerBase]: return [self.get_tokenizer(**kwargs), self.get_rust_tokenizer(**kwargs)]