Use lru_cache for tokenization tests (#36818)

* fix * fix * fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-03-28 15:09:35 +01:00
parent 3af425d4c6
commit 1fcaad6df9
92 changed files with 1301 additions and 884 deletions
--- a/tests/models/squeezebert/test_tokenization_squeezebert.py
+++ b/tests/models/squeezebert/test_tokenization_squeezebert.py
@@ -13,22 +13,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from functools import lru_cache

 from transformers import SqueezeBertTokenizer, SqueezeBertTokenizerFast
 from transformers.testing_utils import require_tokenizers, slow

-from ..bert.test_tokenization_bert import BertTokenizationTest
+from ...test_tokenization_common import use_cache_if_possible
+
+# Avoid import `BertTokenizationTest` directly as it will run as `test_tokenization_squeezebert.py::BertTokenizationTest`
+# together with `test_tokenization_bert.py::BertTokenizationTest`.
+from ..bert import test_tokenization_bert


@require_tokenizers
-class SqueezeBertTokenizationTest(BertTokenizationTest):
+class SqueezeBertTokenizationTest(test_tokenization_bert.BertTokenizationTest):
    tokenizer_class = SqueezeBertTokenizer
    rust_tokenizer_class = SqueezeBertTokenizerFast
    test_rust_tokenizer = True
    from_pretrained_id = "squeezebert/squeezebert-uncased"

-    def get_rust_tokenizer(self, **kwargs):
-        return SqueezeBertTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)
+    @classmethod
+    @use_cache_if_possible
+    @lru_cache(maxsize=64)
+    def get_rust_tokenizer(cls, pretrained_name=None, **kwargs):
+        pretrained_name = pretrained_name or cls.tmpdirname
+        return SqueezeBertTokenizerFast.from_pretrained(pretrained_name, **kwargs)

    @slow
    def test_sequence_builders(self):