Use lru_cache for tokenization tests (#36818)

* fix

* fix

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2025-03-28 15:09:35 +01:00
committed by GitHub
parent 3af425d4c6
commit 1fcaad6df9
92 changed files with 1301 additions and 884 deletions

View File

@@ -13,22 +13,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import lru_cache
from transformers import SqueezeBertTokenizer, SqueezeBertTokenizerFast
from transformers.testing_utils import require_tokenizers, slow
from ..bert.test_tokenization_bert import BertTokenizationTest
from ...test_tokenization_common import use_cache_if_possible
# Avoid import `BertTokenizationTest` directly as it will run as `test_tokenization_squeezebert.py::BertTokenizationTest`
# together with `test_tokenization_bert.py::BertTokenizationTest`.
from ..bert import test_tokenization_bert
@require_tokenizers
class SqueezeBertTokenizationTest(BertTokenizationTest):
class SqueezeBertTokenizationTest(test_tokenization_bert.BertTokenizationTest):
tokenizer_class = SqueezeBertTokenizer
rust_tokenizer_class = SqueezeBertTokenizerFast
test_rust_tokenizer = True
from_pretrained_id = "squeezebert/squeezebert-uncased"
def get_rust_tokenizer(self, **kwargs):
return SqueezeBertTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)
@classmethod
@use_cache_if_possible
@lru_cache(maxsize=64)
def get_rust_tokenizer(cls, pretrained_name=None, **kwargs):
pretrained_name = pretrained_name or cls.tmpdirname
return SqueezeBertTokenizerFast.from_pretrained(pretrained_name, **kwargs)
@slow
def test_sequence_builders(self):