Black preview (#17217)

* Black preview

* Fixup too!

* Fix check copies

* Use the same version as the CI

* Bump black
This commit is contained in:
Sylvain Gugger
2022-05-12 16:25:55 -04:00
committed by GitHub
parent 9bd67ac7bb
commit afe5d42d8d
578 changed files with 8274 additions and 3296 deletions

View File

@@ -72,7 +72,10 @@ class PegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_mask_tokens_rust_pegasus(self):
rust_tokenizer = self.rust_tokenizer_class.from_pretrained(self.tmpdirname)
py_tokenizer = self.tokenizer_class.from_pretrained(self.tmpdirname)
raw_input_str = "Let's see which <unk> is the better <unk_token_11> one <mask_1> It seems like this <mask_2> was important </s> <pad> <pad> <pad>"
raw_input_str = (
"Let's see which <unk> is the better <unk_token_11> one <mask_1> It seems like this <mask_2> was important"
" </s> <pad> <pad> <pad>"
)
rust_ids = rust_tokenizer([raw_input_str], return_tensors=None, add_special_tokens=False).input_ids[0]
py_ids = py_tokenizer([raw_input_str], return_tensors=None, add_special_tokens=False).input_ids[0]
self.assertListEqual(py_ids, rust_ids)
@@ -158,7 +161,10 @@ class BigBirdPegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_mask_tokens_rust_pegasus(self):
rust_tokenizer = self.rust_tokenizer_class.from_pretrained(self.tmpdirname)
py_tokenizer = self.tokenizer_class.from_pretrained(self.tmpdirname)
raw_input_str = "Let's see which <unk> is the better <unk_token> one [MASK] It seems like this [MASK] was important </s> <pad> <pad> <pad>"
raw_input_str = (
"Let's see which <unk> is the better <unk_token> one [MASK] It seems like this [MASK] was important </s>"
" <pad> <pad> <pad>"
)
rust_ids = rust_tokenizer([raw_input_str], return_tensors=None, add_special_tokens=False).input_ids[0]
py_ids = py_tokenizer([raw_input_str], return_tensors=None, add_special_tokens=False).input_ids[0]
self.assertListEqual(py_ids, rust_ids)
@@ -198,7 +204,10 @@ class BigBirdPegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
tokenizer.tokenize(test_str)
"""
test_str = "This is an example string that is used to test the original TF implementation against the HF implementation"
test_str = (
"This is an example string that is used to test the original TF implementation against the HF"
" implementation"
)
token_ids = self._large_tokenizer(test_str).input_ids