Black preview (#17217)

* Black preview

* Fixup too!

* Fix check copies

* Use the same version as the CI

* Bump black
This commit is contained in:
Sylvain Gugger
2022-05-12 16:25:55 -04:00
committed by GitHub
parent 9bd67ac7bb
commit afe5d42d8d
578 changed files with 8274 additions and 3296 deletions

View File

@@ -365,7 +365,8 @@ class MLukeTokenizerIntegrationTests(unittest.TestCase):
self.assertEqual(
tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False),
"<s> ISO 639-3 uses the code fas</s></s> for the dialects spoken across Iran and アフガニスタン ( Afghanistan ).</s>",
"<s> ISO 639-3 uses the code fas</s></s> for the dialects spoken across Iran and アフガニスタン ( Afghanistan"
" ).</s>",
)
self.assertEqual(
tokenizer.decode(encoding["input_ids"][1:5], spaces_between_special_tokens=False), "ISO 639-3"
@@ -423,7 +424,8 @@ class MLukeTokenizerIntegrationTests(unittest.TestCase):
self.assertEqual(
tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False),
"<s> ISO 639-3 uses the code fas</s></s> for the dialects spoken across Iran and アフガニスタン ( Afghanistan ).</s>",
"<s> ISO 639-3 uses the code fas</s></s> for the dialects spoken across Iran and アフガニスタン ( Afghanistan"
" ).</s>",
)
self.assertEqual(
tokenizer.decode(encoding["input_ids"][1:5], spaces_between_special_tokens=False), "ISO 639-3"
@@ -506,7 +508,8 @@ class MLukeTokenizerIntegrationTests(unittest.TestCase):
self.assertEqual(len(encoding["token_type_ids"]), 23)
self.assertEqual(
tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False),
"<s> Japanese is an<ent>East Asian language<ent>spoken by about 128 million people, primarily in Japan.</s>",
"<s> Japanese is an<ent>East Asian language<ent>spoken by about 128 million people, primarily in"
" Japan.</s>",
)
self.assertEqual(
tokenizer.decode(encoding["input_ids"][4:9], spaces_between_special_tokens=False),
@@ -559,7 +562,8 @@ class MLukeTokenizerIntegrationTests(unittest.TestCase):
self.assertEqual(
tokenizer.decode(encoding["input_ids"], spaces_between_special_tokens=False),
"<s><ent>Japanese<ent>is an East Asian language spoken by about 128 million people, primarily in<ent2>Japan<ent2>.</s>",
"<s><ent>Japanese<ent>is an East Asian language spoken by about 128 million people, primarily"
" in<ent2>Japan<ent2>.</s>",
)
self.assertEqual(
tokenizer.decode(encoding["input_ids"][1:4], spaces_between_special_tokens=False),