Make Barthez tokenizer tests a bit faster (#10399)

* Make Barthez tokenizer tests a bit faster * Quality
2021-02-25 11:42:25 -05:00
parent b040e6efc1
commit 26f8b2cb10
4 changed files with 6 additions and 8 deletions
--- a/tests/test_tokenization_barthez.py
+++ b/tests/test_tokenization_barthez.py
@@ -33,8 +33,9 @@ class BarthezTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def setUp(self):
        super().setUp()
-        tokenizer = BarthezTokenizer.from_pretrained("moussaKam/mbarthez")
+        tokenizer = BarthezTokenizerFast.from_pretrained("moussaKam/mbarthez")
        tokenizer.save_pretrained(self.tmpdirname)
        tokenizer.save_pretrained(self.tmpdirname, legacy_format=False)
        self.tokenizer = tokenizer
    @require_torch
--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -238,7 +238,7 @@ class TokenizerTesterMixin:
        tokenizer = self.get_rust_tokenizer()
        for parameter_name, parameter in signature.parameters.items():
-            if parameter.default != inspect.Parameter.empty:
+            if parameter.default != inspect.Parameter.empty and parameter_name != "tokenizer_file":
                self.assertIn(parameter_name, tokenizer.init_kwargs)
    def test_rust_and_python_full_tokenizers(self):
--- a/tests/test_tokenization_mbart.py
+++ b/tests/test_tokenization_mbart.py
@@ -12,18 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import tempfile
 import unittest
 from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
 from transformers.file_utils import is_sentencepiece_available
 from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
 from .test_tokenization_common import TokenizerTesterMixin
-if is_sentencepiece_available():
+SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
    from .test_tokenization_xlm_roberta import SAMPLE_VOCAB
 if is_torch_available():
--- a/tests/test_tokenization_mbart50.py
+++ b/tests/test_tokenization_mbart50.py
@@ -17,13 +17,11 @@ import tempfile
 import unittest
 from transformers import SPIECE_UNDERLINE, BatchEncoding, MBart50Tokenizer, MBart50TokenizerFast, is_torch_available
 from transformers.file_utils import is_sentencepiece_available
 from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
 from .test_tokenization_common import TokenizerTesterMixin
 if is_sentencepiece_available():
 SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")