Update quality tooling for formatting (#21480)

* Result of black 23.1 * Update target to Python 3.7 * Switch flake8 to ruff * Configure isort * Configure isort * Apply isort with line limit * Put the right black version * adapt black in check copies * Fix copies
2023-02-06 18:10:56 -05:00
parent b7bb2b59f7
commit 6f79d26442
1211 changed files with 1532 additions and 2687 deletions
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1824,7 +1824,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        cache_dir=None,
        local_files_only=False,
        _commit_hash=None,
-        **kwargs
+        **kwargs,
    ):
        # We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json
        # file or if `from_slow` is set to True.
@@ -1932,7 +1932,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):

            model_max_length = cls.max_model_input_sizes[pretrained_model_name_or_path]
            if model_max_length is not None and isinstance(model_max_length, (int, float)):
-
                model_max_length = min(init_kwargs.get("model_max_length", int(1e30)), model_max_length)
                # TODO(PVP) - uncomment following line in Transformers v5
                # init_kwargs["model_max_length"] = model_max_length
@@ -2278,7 +2277,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        max_length: Optional[int] = None,
        stride: int = 0,
        return_tensors: Optional[Union[str, TensorType]] = None,
-        **kwargs
+        **kwargs,
    ) -> List[int]:
        """
        Converts a string to a sequence of ids (integer), using the tokenizer and vocabulary.
@@ -2474,7 +2473,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        return_offsets_mapping: bool = False,
        return_length: bool = False,
        verbose: bool = True,
-        **kwargs
+        **kwargs,
    ) -> BatchEncoding:
        """
        Main method to tokenize and prepare for the model one or several sequence(s) or one or several pair(s) of
@@ -2558,7 +2557,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        return_offsets_mapping: bool = False,
        return_length: bool = False,
        verbose: bool = True,
-        **kwargs
+        **kwargs,
    ) -> BatchEncoding:
        # Input type checking for clearer error
        def _is_valid_text_input(t):
@@ -2671,7 +2670,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        return_offsets_mapping: bool = False,
        return_length: bool = False,
        verbose: bool = True,
-        **kwargs
+        **kwargs,
    ) -> BatchEncoding:
        """
        Tokenize and prepare for the model a sequence or a pair of sequences.
@@ -2743,7 +2742,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        return_offsets_mapping: bool = False,
        return_length: bool = False,
        verbose: bool = True,
-        **kwargs
+        **kwargs,
    ) -> BatchEncoding:
        raise NotImplementedError

@@ -2773,7 +2772,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        return_offsets_mapping: bool = False,
        return_length: bool = False,
        verbose: bool = True,
-        **kwargs
+        **kwargs,
    ) -> BatchEncoding:
        """
        Tokenize and prepare for the model a list of sequences or a list of pairs of sequences.
@@ -2846,7 +2845,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        return_offsets_mapping: bool = False,
        return_length: bool = False,
        verbose: bool = True,
-        **kwargs
+        **kwargs,
    ) -> BatchEncoding:
        raise NotImplementedError

@@ -3083,7 +3082,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        return_length: bool = False,
        verbose: bool = True,
        prepend_batch_axis: bool = False,
-        **kwargs
+        **kwargs,
    ) -> BatchEncoding:
        """
        Prepares a sequence of input id, or a pair of sequences of inputs ids so that it can be used by the model. It
@@ -3271,8 +3270,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
                )
                if truncation_strategy == TruncationStrategy.ONLY_FIRST:
                    error_msg = (
-                        error_msg
-                        + "Please select another truncation strategy than "
+                        error_msg + "Please select another truncation strategy than "
                        f"{truncation_strategy}, for instance 'longest_first' or 'only_second'."
                    )
                logger.error(error_msg)
@@ -3373,7 +3371,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):

            if self.padding_side == "right":
                if return_attention_mask:
-
                    encoded_inputs["attention_mask"] = encoded_inputs["attention_mask"] + [0] * difference
                if "token_type_ids" in encoded_inputs:
                    encoded_inputs["token_type_ids"] = (
@@ -3415,7 +3412,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        sequences: Union[List[int], List[List[int]], "np.ndarray", "torch.Tensor", "tf.Tensor"],
        skip_special_tokens: bool = False,
        clean_up_tokenization_spaces: bool = True,
-        **kwargs
+        **kwargs,
    ) -> List[str]:
        """
        Convert a list of lists of token ids into a list of strings by calling decode.
@@ -3448,7 +3445,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        token_ids: Union[int, List[int], "np.ndarray", "torch.Tensor", "tf.Tensor"],
        skip_special_tokens: bool = False,
        clean_up_tokenization_spaces: bool = True,
-        **kwargs
+        **kwargs,
    ) -> str:
        """
        Converts a sequence of ids in a string, using the tokenizer and vocabulary with options to remove special
@@ -3484,7 +3481,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        token_ids: Union[int, List[int]],
        skip_special_tokens: bool = False,
        clean_up_tokenization_spaces: bool = True,
-        **kwargs
+        **kwargs,
    ) -> str:
        raise NotImplementedError