[Tests] Speed up tokenizer tests (#14964)

* speed up canine and mluke

* speed up mbart and mbart50 toks

* upload files
This commit is contained in:
Patrick von Platen
2021-12-28 17:02:50 +01:00
committed by GitHub
parent f80775df2b
commit 1bfa347707
6 changed files with 297 additions and 7 deletions

View File

@@ -39,11 +39,12 @@ class CanineTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@cached_property
def canine_tokenizer(self):
# TODO replace nielsr by google
return CanineTokenizer.from_pretrained("nielsr/canine-s")
return CanineTokenizer.from_pretrained("google/canine-s")
def get_tokenizer(self, **kwargs) -> CanineTokenizer:
return self.tokenizer_class.from_pretrained(self.tmpdirname, **kwargs)
tokenizer = self.tokenizer_class.from_pretrained(self.tmpdirname, **kwargs)
tokenizer._unicode_vocab_size = 1024
return tokenizer
@require_torch
def test_prepare_batch_integration(self):