diff --git a/.circleci/config.yml b/.circleci/config.yml index 44dc19ea43..b4d27d2d63 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -101,7 +101,7 @@ jobs: # we need a version of isort with https://github.com/timothycrosley/isort/pull/1000 - run: sudo pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort - run: sudo pip install .[tf,torch,quality] - - run: black --check --line-length 119 examples templates tests src utils + - run: black --check --line-length 119 --target-version py35 examples templates tests src utils - run: isort --check-only --recursive examples templates tests src utils - run: flake8 examples templates tests src utils check_repository_consistency: diff --git a/Makefile b/Makefile index 6fccec8c21..dc2a6491ee 100644 --- a/Makefile +++ b/Makefile @@ -3,14 +3,14 @@ # Check that source code meets quality standards quality: - black --check --line-length 119 examples templates tests src utils + black --check --line-length 119 --target-version py35 examples templates tests src utils isort --check-only --recursive examples templates tests src utils flake8 examples templates tests src utils # Format source code automatically style: - black --line-length 119 examples templates tests src utils + black --line-length 119 --target-version py35 examples templates tests src utils isort --recursive examples templates tests src utils # Run tests for the library diff --git a/src/transformers/modeling_encoder_decoder.py b/src/transformers/modeling_encoder_decoder.py index 9578b121f6..696b0fcad5 100644 --- a/src/transformers/modeling_encoder_decoder.py +++ b/src/transformers/modeling_encoder_decoder.py @@ -325,7 +325,7 @@ class Model2Model(PreTrainedEncoderDecoder): encoder_pretrained_model_name_or_path=pretrained_model_name_or_path, decoder_pretrained_model_name_or_path=pretrained_model_name_or_path, *args, - **kwargs + **kwargs, ) return model diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index b9c0adac38..df91ad011e 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -250,7 +250,7 @@ class TFPreTrainedModel(tf.keras.Model): return_unused_kwargs=True, force_download=force_download, resume_download=resume_download, - **kwargs + **kwargs, ) else: model_kwargs = kwargs diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 0c3b2ac980..115494f2f5 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -355,7 +355,7 @@ class PreTrainedModel(nn.Module): force_download=force_download, resume_download=resume_download, proxies=proxies, - **kwargs + **kwargs, ) else: model_kwargs = kwargs diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py index 3ce8ea72e4..09216ebd84 100755 --- a/src/transformers/pipelines.py +++ b/src/transformers/pipelines.py @@ -643,7 +643,7 @@ class QuestionAnsweringPipeline(Pipeline): framework=framework, args_parser=QuestionAnsweringArgumentHandler(), device=device, - **kwargs + **kwargs, ) @staticmethod diff --git a/src/transformers/tokenization_albert.py b/src/transformers/tokenization_albert.py index 0c06b48a49..f8d058a725 100644 --- a/src/transformers/tokenization_albert.py +++ b/src/transformers/tokenization_albert.py @@ -87,7 +87,7 @@ class AlbertTokenizer(PreTrainedTokenizer): pad_token=pad_token, cls_token=cls_token, mask_token=mask_token, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens diff --git a/src/transformers/tokenization_bert.py b/src/transformers/tokenization_bert.py index 4c2abc7d17..1e817c54a3 100644 --- a/src/transformers/tokenization_bert.py +++ b/src/transformers/tokenization_bert.py @@ -169,7 +169,7 @@ class BertTokenizer(PreTrainedTokenizer): pad_token=pad_token, cls_token=cls_token, mask_token=mask_token, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens @@ -560,7 +560,7 @@ class BertTokenizerFast(PreTrainedTokenizerFast): pad_token=pad_token, cls_token=cls_token, mask_token=mask_token, - **kwargs + **kwargs, ) self._tokenizer = tk.Tokenizer(tk.models.WordPiece.from_files(vocab_file, unk_token=unk_token)) diff --git a/src/transformers/tokenization_bert_japanese.py b/src/transformers/tokenization_bert_japanese.py index 57dde09734..346d12360c 100644 --- a/src/transformers/tokenization_bert_japanese.py +++ b/src/transformers/tokenization_bert_japanese.py @@ -113,7 +113,7 @@ class BertJapaneseTokenizer(BertTokenizer): pad_token=pad_token, cls_token=cls_token, mask_token=mask_token, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens diff --git a/src/transformers/tokenization_camembert.py b/src/transformers/tokenization_camembert.py index dd33d510da..038856be8b 100644 --- a/src/transformers/tokenization_camembert.py +++ b/src/transformers/tokenization_camembert.py @@ -76,7 +76,7 @@ class CamembertTokenizer(PreTrainedTokenizer): pad_token=pad_token, mask_token=mask_token, additional_special_tokens=additional_special_tokens, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens self.max_len_sentences_pair = self.max_len - 4 # take into account special tokens diff --git a/src/transformers/tokenization_roberta.py b/src/transformers/tokenization_roberta.py index 78d6cf956d..ed97058021 100644 --- a/src/transformers/tokenization_roberta.py +++ b/src/transformers/tokenization_roberta.py @@ -95,7 +95,7 @@ class RobertaTokenizer(GPT2Tokenizer): cls_token=cls_token, pad_token=pad_token, mask_token=mask_token, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens self.max_len_sentences_pair = self.max_len - 4 # take into account special tokens diff --git a/src/transformers/tokenization_t5.py b/src/transformers/tokenization_t5.py index 02c70fe5da..b010843d42 100644 --- a/src/transformers/tokenization_t5.py +++ b/src/transformers/tokenization_t5.py @@ -96,7 +96,7 @@ class T5Tokenizer(PreTrainedTokenizer): unk_token=unk_token, pad_token=pad_token, additional_special_tokens=additional_special_tokens, - **kwargs + **kwargs, ) try: diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 57bc068d58..4a583d9312 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -817,7 +817,7 @@ class PreTrainedTokenizer(object): truncation_strategy=truncation_strategy, pad_to_max_length=pad_to_max_length, return_tensors=return_tensors, - **kwargs + **kwargs, ) return encoded_inputs["input_ids"] diff --git a/src/transformers/tokenization_xlm.py b/src/transformers/tokenization_xlm.py index c59d28feee..fe15eba45d 100644 --- a/src/transformers/tokenization_xlm.py +++ b/src/transformers/tokenization_xlm.py @@ -586,7 +586,7 @@ class XLMTokenizer(PreTrainedTokenizer): cls_token=cls_token, mask_token=mask_token, additional_special_tokens=additional_special_tokens, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens diff --git a/src/transformers/tokenization_xlm_roberta.py b/src/transformers/tokenization_xlm_roberta.py index dde2382f8b..5fe624e6bd 100644 --- a/src/transformers/tokenization_xlm_roberta.py +++ b/src/transformers/tokenization_xlm_roberta.py @@ -83,7 +83,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer): cls_token=cls_token, pad_token=pad_token, mask_token=mask_token, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens self.max_len_sentences_pair = self.max_len - 4 # take into account special tokens diff --git a/src/transformers/tokenization_xlnet.py b/src/transformers/tokenization_xlnet.py index 38c2a00ff9..14f0d26b72 100644 --- a/src/transformers/tokenization_xlnet.py +++ b/src/transformers/tokenization_xlnet.py @@ -86,7 +86,7 @@ class XLNetTokenizer(PreTrainedTokenizer): cls_token=cls_token, mask_token=mask_token, additional_special_tokens=additional_special_tokens, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens diff --git a/templates/adding_a_new_model/tokenization_xxx.py b/templates/adding_a_new_model/tokenization_xxx.py index 2dd88d7287..dabb14be9a 100644 --- a/templates/adding_a_new_model/tokenization_xxx.py +++ b/templates/adding_a_new_model/tokenization_xxx.py @@ -115,7 +115,7 @@ class XxxTokenizer(PreTrainedTokenizer): pad_token=pad_token, cls_token=cls_token, mask_token=mask_token, - **kwargs + **kwargs, ) self.max_len_single_sentence = self.max_len - 2 # take into account special tokens self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens diff --git a/tests/test_tokenization_bert.py b/tests/test_tokenization_bert.py index 7af6cbee73..c83611206e 100644 --- a/tests/test_tokenization_bert.py +++ b/tests/test_tokenization_bert.py @@ -84,7 +84,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer() rust_tokenizer = self.get_rust_tokenizer(add_special_tokens=False) - sequence = u"UNwant\u00E9d,running" + sequence = "UNwant\u00E9d,running" tokens = tokenizer.tokenize(sequence) rust_tokens = rust_tokenizer.tokenize(sequence) diff --git a/tests/test_tokenization_gpt2.py b/tests/test_tokenization_gpt2.py index fdd8026a8f..1967b7a758 100644 --- a/tests/test_tokenization_gpt2.py +++ b/tests/test_tokenization_gpt2.py @@ -96,7 +96,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): tokenizer = self.get_tokenizer() rust_tokenizer = self.get_rust_tokenizer(add_special_tokens=False, add_prefix_space=True) - sequence = u"lower newer" + sequence = "lower newer" # Testing tokenization tokens = tokenizer.tokenize(sequence, add_prefix_space=True)