Make Barthez tokenizer tests a bit faster (#10399)
* Make Barthez tokenizer tests a bit faster * Quality
This commit is contained in:
@@ -33,8 +33,9 @@ class BarthezTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
|||||||
def setUp(self):
|
def setUp(self):
|
||||||
super().setUp()
|
super().setUp()
|
||||||
|
|
||||||
tokenizer = BarthezTokenizer.from_pretrained("moussaKam/mbarthez")
|
tokenizer = BarthezTokenizerFast.from_pretrained("moussaKam/mbarthez")
|
||||||
tokenizer.save_pretrained(self.tmpdirname)
|
tokenizer.save_pretrained(self.tmpdirname)
|
||||||
|
tokenizer.save_pretrained(self.tmpdirname, legacy_format=False)
|
||||||
self.tokenizer = tokenizer
|
self.tokenizer = tokenizer
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
|
|||||||
@@ -238,7 +238,7 @@ class TokenizerTesterMixin:
|
|||||||
tokenizer = self.get_rust_tokenizer()
|
tokenizer = self.get_rust_tokenizer()
|
||||||
|
|
||||||
for parameter_name, parameter in signature.parameters.items():
|
for parameter_name, parameter in signature.parameters.items():
|
||||||
if parameter.default != inspect.Parameter.empty:
|
if parameter.default != inspect.Parameter.empty and parameter_name != "tokenizer_file":
|
||||||
self.assertIn(parameter_name, tokenizer.init_kwargs)
|
self.assertIn(parameter_name, tokenizer.init_kwargs)
|
||||||
|
|
||||||
def test_rust_and_python_full_tokenizers(self):
|
def test_rust_and_python_full_tokenizers(self):
|
||||||
|
|||||||
@@ -12,18 +12,17 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
|
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBartTokenizer, MBartTokenizerFast, is_torch_available
|
||||||
from transformers.file_utils import is_sentencepiece_available
|
|
||||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
|
||||||
|
|
||||||
from .test_tokenization_common import TokenizerTesterMixin
|
from .test_tokenization_common import TokenizerTesterMixin
|
||||||
|
|
||||||
|
|
||||||
if is_sentencepiece_available():
|
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
|
||||||
from .test_tokenization_xlm_roberta import SAMPLE_VOCAB
|
|
||||||
|
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
|
|||||||
@@ -17,13 +17,11 @@ import tempfile
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBart50Tokenizer, MBart50TokenizerFast, is_torch_available
|
from transformers import SPIECE_UNDERLINE, BatchEncoding, MBart50Tokenizer, MBart50TokenizerFast, is_torch_available
|
||||||
from transformers.file_utils import is_sentencepiece_available
|
|
||||||
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
|
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch
|
||||||
|
|
||||||
from .test_tokenization_common import TokenizerTesterMixin
|
from .test_tokenization_common import TokenizerTesterMixin
|
||||||
|
|
||||||
|
|
||||||
if is_sentencepiece_available():
|
|
||||||
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
|
SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user