clean_up_tokenization_spaces=False if unset (#31938)

* clean_up_tokenization_spaces=False if unset

* deprecate warning

* updating param for old models

* update models

* make fix-copies

* fix-copies and update bert models

* warning msg

* update prophet and clvp

* updating test since space before is arbitrarily removed

* remove warning for 4.45
This commit is contained in:
Ita Zaporozhets
2024-09-26 13:38:20 -04:00
committed by GitHub
parent 3557f9a14a
commit 6730485b02
16 changed files with 67 additions and 7 deletions

View File

@@ -79,7 +79,7 @@ class ClvpTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
# Copied from transformers.tests.models.gpt2.test_tokenization_gpt2.GPT2TokenizationTest.get_input_output_texts
def get_input_output_texts(self, tokenizer):
input_text = "lower newer"
output_text = "lower newer"
output_text = "lower[SPACE]newer"
return input_text, output_text
# Copied from transformers.tests.models.layoutxlm.test_tokenization_layoutxlm.LayoutXLMTokenizationTest.test_add_special_tokens