update ruff version (#30932)

* update ruff version

* fix research projects

* Empty

* Fix errors

---------

Co-authored-by: Lysandre <lysandre@huggingface.co>
This commit is contained in:
Arthur
2024-05-22 06:40:15 +02:00
committed by GitHub
parent 60bb571e99
commit 673440d073
1172 changed files with 1555 additions and 1861 deletions

View File

@@ -61,23 +61,23 @@ class FunnelTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
return FunnelTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)
def get_input_output_texts(self, tokenizer):
input_text = "UNwant\u00E9d,running"
input_text = "UNwant\u00e9d,running"
output_text = "unwanted, running"
return input_text, output_text
def test_full_tokenizer(self):
tokenizer = self.tokenizer_class(self.vocab_file)
tokens = tokenizer.tokenize("UNwant\u00E9d,running")
tokens = tokenizer.tokenize("UNwant\u00e9d,running")
self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
def test_token_type_ids(self):
tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers:
inputs = tokenizer("UNwant\u00E9d,running")
inputs = tokenizer("UNwant\u00e9d,running")
sentence_len = len(inputs["input_ids"]) - 1
self.assertListEqual(inputs["token_type_ids"], [2] + [0] * sentence_len)
inputs = tokenizer("UNwant\u00E9d,running", "UNwant\u00E9d,running")
inputs = tokenizer("UNwant\u00e9d,running", "UNwant\u00e9d,running")
self.assertListEqual(inputs["token_type_ids"], [2] + [0] * sentence_len + [1] * sentence_len)