fix set truncation attribute in __init__ of PreTrainedTokenizerBase (#15456)
* change truncation_side in init of `PreTrainedTokenizerBase` Co-authored-by: LSinev <LSinev@users.noreply.github.com> * add test * Revert "replace assert with exception for `padding_side` arg in `PreTrainedTokenizerBase` `__init__`" This reverts commit 7a98b87962d2635c7e4d4f00db3948b694624843. * fix kwargs * Revert "fix kwargs" This reverts commit 67b0a5270e8cf1dbf70e6b0232e94c0452b6946f. * Update tests/test_tokenization_common.py Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * delete truncation_side variable * reorganize test * format * complete doc * Revert "Revert "replace assert with exception for `padding_side` arg in `PreTrainedTokenizerBase` `__init__`"" This reverts commit d5a10a7e2680539e5d9e98ae5d896c893d224b80. * fix typo * fix typos to render documentation * Revert "Revert "Revert "replace assert with exception for `padding_side` arg in `PreTrainedTokenizerBase` `__init__`""" This reverts commit 16cf58811943a08f43409a7c83eaa330686591d0. * format Co-authored-by: LSinev <LSinev@users.noreply.github.com> Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
@@ -1415,6 +1415,47 @@ class TokenizerTesterMixin:
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def test_truncation_side_in_kwargs(self):
|
||||
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
|
||||
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
|
||||
if self.test_rust_tokenizer:
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(
|
||||
pretrained_name, truncation_side="left", **kwargs
|
||||
)
|
||||
self.assertEqual(tokenizer_r.truncation_side, "left")
|
||||
|
||||
tokenizer_r = self.rust_tokenizer_class.from_pretrained(
|
||||
pretrained_name, truncation_side="right", **kwargs
|
||||
)
|
||||
self.assertEqual(tokenizer_r.truncation_side, "right")
|
||||
|
||||
self.assertRaises(
|
||||
ValueError,
|
||||
self.rust_tokenizer_class.from_pretrained,
|
||||
pretrained_name,
|
||||
truncation_side="unauthorized",
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if self.test_slow_tokenizer:
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(
|
||||
pretrained_name, truncation_side="left", **kwargs
|
||||
)
|
||||
self.assertEqual(tokenizer_p.truncation_side, "left")
|
||||
|
||||
tokenizer_p = self.tokenizer_class.from_pretrained(
|
||||
pretrained_name, truncation_side="right", **kwargs
|
||||
)
|
||||
self.assertEqual(tokenizer_p.truncation_side, "right")
|
||||
|
||||
self.assertRaises(
|
||||
ValueError,
|
||||
self.tokenizer_class.from_pretrained,
|
||||
pretrained_name,
|
||||
truncation_side="unauthorized",
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def test_right_and_left_padding(self):
|
||||
tokenizers = self.get_tokenizers(do_lower_case=False)
|
||||
for tokenizer in tokenizers:
|
||||
|
||||
Reference in New Issue
Block a user