Remove deprecated properties in tokenization_nllb.py and tokenization_nllb_fast.py (#29834)
* Fix typo in tokenization_nllb.py Change `adder_tokens_decoder` into `added_tokens_decoder` and improve the warning's readability. * Fix typo in tokenization_nllb_fast.py Change `adder_tokens_decoder` into `added_tokens_decoder` and improve the warning's readability. * Remove deprecated attributes in tokenization_nllb.py Remove deprecated attributes: `lang_code_to_id`, `fairseq_tokens_to_ids`, `id_to_lang_code`, and `fairseq_ids_to_tokens` * Remove deprecated attribute in tokenization_nllb_fast.py Remove deprecated attribute `lang_code_to_id` * Remove deprecated properties in tokenization_nllb.py Remove deprecated properties - fix format * Remove deprecated properties in tokenization_nllb_fast.py Remove deprecated properties - fix format * Update test_tokenization_nllb.py * update test_tokenization_nllb.py * Update tokenization_nllb.py * Update test_tokenization_seamless_m4t.py * Update test_tokenization_seamless_m4t.py
This commit is contained in:
@@ -367,11 +367,6 @@ class NllbDistilledIntegrationTest(unittest.TestCase):
|
||||
cls.pad_token_id = 1
|
||||
return cls
|
||||
|
||||
def test_language_codes(self):
|
||||
self.assertEqual(self.tokenizer.fairseq_tokens_to_ids["ace_Arab"], 256001)
|
||||
self.assertEqual(self.tokenizer.fairseq_tokens_to_ids["ace_Latn"], 256002)
|
||||
self.assertEqual(self.tokenizer.fairseq_tokens_to_ids["fra_Latn"], 256057)
|
||||
|
||||
def test_enro_tokenizer_batch_encode_plus(self):
|
||||
ids = self.tokenizer.batch_encode_plus(self.src_text).input_ids[0]
|
||||
self.assertListEqual(self.expected_src_tokens, ids)
|
||||
@@ -397,13 +392,6 @@ class NllbDistilledIntegrationTest(unittest.TestCase):
|
||||
def test_mask_token(self):
|
||||
self.assertListEqual(self.tokenizer.convert_tokens_to_ids(["<mask>", "ar_AR"]), [256203, 3])
|
||||
|
||||
def test_special_tokens_unaffacted_by_save_load(self):
|
||||
tmpdirname = tempfile.mkdtemp()
|
||||
original_special_tokens = self.tokenizer.fairseq_tokens_to_ids
|
||||
self.tokenizer.save_pretrained(tmpdirname)
|
||||
new_tok = NllbTokenizer.from_pretrained(tmpdirname)
|
||||
self.assertDictEqual(new_tok.fairseq_tokens_to_ids, original_special_tokens)
|
||||
|
||||
@require_torch
|
||||
def test_enro_tokenizer_prepare_batch(self):
|
||||
batch = self.tokenizer(
|
||||
|
||||
Reference in New Issue
Block a user