Use lru_cache for tokenization tests (#36818)

* fix

* fix

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2025-03-28 15:09:35 +01:00
committed by GitHub
parent 3af425d4c6
commit 1fcaad6df9
92 changed files with 1301 additions and 884 deletions

View File

@@ -15,11 +15,12 @@
import tempfile
import unittest
from functools import lru_cache
from transformers import RoFormerTokenizer, RoFormerTokenizerFast
from transformers.testing_utils import require_rjieba, require_tokenizers
from ...test_tokenization_common import TokenizerTesterMixin
from ...test_tokenization_common import TokenizerTesterMixin, use_cache_if_possible
@require_rjieba
@@ -31,14 +32,25 @@ class RoFormerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
space_between_special_tokens = True
test_rust_tokenizer = True
def setUp(self):
super().setUp()
@classmethod
def setUpClass(cls):
super().setUpClass()
tokenizer = cls.tokenizer_class.from_pretrained("junnyu/roformer_chinese_base")
tokenizer.save_pretrained(cls.tmpdirname)
def get_tokenizer(self, **kwargs):
return self.tokenizer_class.from_pretrained("junnyu/roformer_chinese_base", **kwargs)
@classmethod
@use_cache_if_possible
@lru_cache(maxsize=64)
def get_tokenizer(cls, pretrained_name=None, **kwargs):
pretrained_name = pretrained_name or cls.tmpdirname
return cls.tokenizer_class.from_pretrained(pretrained_name, **kwargs)
def get_rust_tokenizer(self, **kwargs):
return self.rust_tokenizer_class.from_pretrained("junnyu/roformer_chinese_base", **kwargs)
@classmethod
@use_cache_if_possible
@lru_cache(maxsize=64)
def get_rust_tokenizer(cls, pretrained_name=None, **kwargs):
pretrained_name = pretrained_name or cls.tmpdirname
return cls.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
def get_chinese_input_output_texts(self):
input_text = "永和服装饰品有限公司,今天天气非常好"