Update quality tooling for formatting (#21480)

* Result of black 23.1

* Update target to Python 3.7

* Switch flake8 to ruff

* Configure isort

* Configure isort

* Apply isort with line limit

* Put the right black version

* adapt black in check copies

* Fix copies
This commit is contained in:
Sylvain Gugger
2023-02-06 18:10:56 -05:00
committed by GitHub
parent b7bb2b59f7
commit 6f79d26442
1211 changed files with 1532 additions and 2687 deletions

View File

@@ -34,6 +34,7 @@ from huggingface_hub import HfFolder, delete_repo, set_access_token
from huggingface_hub.file_download import http_get
from parameterized import parameterized
from requests.exceptions import HTTPError
from transformers import (
AlbertTokenizer,
AlbertTokenizerFast,
@@ -131,7 +132,6 @@ def merge_model_tokenizer_mappings(
class TokenizerTesterMixin:
tokenizer_class = None
rust_tokenizer_class = None
test_slow_tokenizer = True
@@ -915,7 +915,6 @@ class TokenizerTesterMixin:
tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
new_toks = [
AddedToken("[ABC]", normalized=False),
AddedToken("[DEF]", normalized=False),
@@ -953,7 +952,6 @@ class TokenizerTesterMixin:
tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if (
tokenizer.build_inputs_with_special_tokens.__qualname__.split(".")[0] != "PreTrainedTokenizer"
and "token_type_ids" in tokenizer.model_input_names
@@ -1004,7 +1002,6 @@ class TokenizerTesterMixin:
tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
seq_0 = "Test this method."
seq_1 = "With these inputs."
@@ -2140,7 +2137,6 @@ class TokenizerTesterMixin:
tokenizers = self.get_tokenizers(do_lower_case=False) # , add_prefix_space=True)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if hasattr(tokenizer, "add_prefix_space") and not tokenizer.add_prefix_space:
continue
@@ -2373,7 +2369,6 @@ class TokenizerTesterMixin:
tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
return
@@ -2956,7 +2951,6 @@ class TokenizerTesterMixin:
tokenizer = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs)
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name}, {tokenizer.__class__.__name__})"):
if is_torch_available():
returned_tensor = "pt"
elif is_tf_available():
@@ -3579,7 +3573,6 @@ class TokenizerTesterMixin:
def test_special_tokens_initialization(self):
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
added_tokens = [AddedToken("<special>", lstrip=True)]
tokenizer_r = self.rust_tokenizer_class.from_pretrained(