From 448c467256332e4be8c122a159b482c1ef039b98 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 14 May 2020 13:14:26 -0400 Subject: [PATCH] Fix: unpin flake8 and fix cs errors (#4367) * Fix: unpin flake8 and fix cs errors * Ok we still need to quote those --- examples/benchmarks.py | 2 +- examples/distillation/distiller.py | 8 ++++---- examples/distillation/scripts/binarized_data.py | 2 +- examples/distillation/scripts/extract.py | 2 +- examples/distillation/scripts/extract_distilbert.py | 10 +++++----- examples/distillation/train.py | 4 ++-- setup.cfg | 2 +- setup.py | 2 +- src/transformers/convert_marian_to_pytorch.py | 2 +- src/transformers/data/datasets/glue.py | 2 +- src/transformers/data/datasets/language_modeling.py | 2 +- src/transformers/pipelines.py | 6 +++++- tests/test_tokenization_common.py | 12 +++++++++++- 13 files changed, 35 insertions(+), 21 deletions(-) diff --git a/examples/benchmarks.py b/examples/benchmarks.py index fb3f51d1c4..f215482999 100644 --- a/examples/benchmarks.py +++ b/examples/benchmarks.py @@ -478,7 +478,7 @@ def _compute_pytorch( dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" if not no_speed: - print_fn("Going through model with sequence of shape".format(sequence.shape)) + print_fn("Going through model with sequence of shape {}".format(sequence.shape)) runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3) average_time = sum(runtimes) / float(len(runtimes)) / 3.0 dictionary[model_name]["time"][batch_size][slice_size] = average_time diff --git a/examples/distillation/distiller.py b/examples/distillation/distiller.py index 53669623b6..893d9916a9 100644 --- a/examples/distillation/distiller.py +++ b/examples/distillation/distiller.py @@ -80,7 +80,7 @@ class Distiller: self.mlm = params.mlm if self.mlm: - logger.info(f"Using MLM loss for LM step.") + logger.info("Using MLM loss for LM step.") self.mlm_mask_prop = params.mlm_mask_prop assert 0.0 <= self.mlm_mask_prop <= 1.0 assert params.word_mask + params.word_keep + params.word_rand == 1.0 @@ -91,7 +91,7 @@ class Distiller: self.pred_probs = self.pred_probs.half() self.token_probs = self.token_probs.half() else: - logger.info(f"Using CLM loss for LM step.") + logger.info("Using CLM loss for LM step.") self.epoch = 0 self.n_iter = 0 @@ -365,8 +365,8 @@ class Distiller: self.end_epoch() if self.is_master: - logger.info(f"Save very last checkpoint as `pytorch_model.bin`.") - self.save_checkpoint(checkpoint_name=f"pytorch_model.bin") + logger.info("Save very last checkpoint as `pytorch_model.bin`.") + self.save_checkpoint(checkpoint_name="pytorch_model.bin") logger.info("Training is finished") def step(self, input_ids: torch.tensor, attention_mask: torch.tensor, lm_labels: torch.tensor): diff --git a/examples/distillation/scripts/binarized_data.py b/examples/distillation/scripts/binarized_data.py index 2dcca18396..8e34b29dcc 100644 --- a/examples/distillation/scripts/binarized_data.py +++ b/examples/distillation/scripts/binarized_data.py @@ -60,7 +60,7 @@ def main(): with open(args.file_path, "r", encoding="utf8") as fp: data = fp.readlines() - logger.info(f"Start encoding") + logger.info("Start encoding") logger.info(f"{len(data)} examples to process.") rslt = [] diff --git a/examples/distillation/scripts/extract.py b/examples/distillation/scripts/extract.py index 8d102c0cda..b4bea90d53 100644 --- a/examples/distillation/scripts/extract.py +++ b/examples/distillation/scripts/extract.py @@ -93,7 +93,7 @@ if __name__ == "__main__": elif args.model_type == "gpt2": for w in ["weight", "bias"]: compressed_sd[f"{prefix}.ln_f.{w}"] = state_dict[f"{prefix}.ln_f.{w}"] - compressed_sd[f"lm_head.weight"] = state_dict[f"lm_head.weight"] + compressed_sd["lm_head.weight"] = state_dict["lm_head.weight"] print(f"N layers selected for distillation: {std_idx}") print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}") diff --git a/examples/distillation/scripts/extract_distilbert.py b/examples/distillation/scripts/extract_distilbert.py index 972418b56b..d709268cf0 100644 --- a/examples/distillation/scripts/extract_distilbert.py +++ b/examples/distillation/scripts/extract_distilbert.py @@ -37,7 +37,7 @@ if __name__ == "__main__": model = BertForMaskedLM.from_pretrained(args.model_name) prefix = "bert" else: - raise ValueError(f'args.model_type should be "bert".') + raise ValueError('args.model_type should be "bert".') state_dict = model.state_dict() compressed_sd = {} @@ -78,12 +78,12 @@ if __name__ == "__main__": ] std_idx += 1 - compressed_sd[f"vocab_projector.weight"] = state_dict[f"cls.predictions.decoder.weight"] - compressed_sd[f"vocab_projector.bias"] = state_dict[f"cls.predictions.bias"] + compressed_sd["vocab_projector.weight"] = state_dict["cls.predictions.decoder.weight"] + compressed_sd["vocab_projector.bias"] = state_dict["cls.predictions.bias"] if args.vocab_transform: for w in ["weight", "bias"]: - compressed_sd[f"vocab_transform.{w}"] = state_dict[f"cls.predictions.transform.dense.{w}"] - compressed_sd[f"vocab_layer_norm.{w}"] = state_dict[f"cls.predictions.transform.LayerNorm.{w}"] + compressed_sd[f"vocab_transform.{w}"] = state_dict["cls.predictions.transform.dense.{w}"] + compressed_sd[f"vocab_layer_norm.{w}"] = state_dict["cls.predictions.transform.LayerNorm.{w}"] print(f"N layers selected for distillation: {std_idx}") print(f"Number of params transfered for distillation: {len(compressed_sd.keys())}") diff --git a/examples/distillation/train.py b/examples/distillation/train.py index 670d03ea16..0d21ae04f8 100644 --- a/examples/distillation/train.py +++ b/examples/distillation/train.py @@ -273,7 +273,7 @@ def main(): token_probs = None train_lm_seq_dataset = LmSeqsDataset(params=args, data=data) - logger.info(f"Data loader created.") + logger.info("Data loader created.") # STUDENT # logger.info(f"Loading student config from {args.student_config}") @@ -288,7 +288,7 @@ def main(): if args.n_gpu > 0: student.to(f"cuda:{args.local_rank}") - logger.info(f"Student loaded.") + logger.info("Student loaded.") # TEACHER # teacher = teacher_model_class.from_pretrained(args.teacher_name, output_hidden_states=True) diff --git a/setup.cfg b/setup.cfg index 2a081a8acb..79c4d49e3e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,5 +36,5 @@ multi_line_output = 3 use_parentheses = True [flake8] -ignore = E203, E501, W503 +ignore = E203, E501, E741, W503 max-line-length = 119 diff --git a/setup.py b/setup.py index 5f186ce000..c45abe8655 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ extras["docs"] = ["recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rt extras["quality"] = [ "black", "isort @ git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort", - "flake8==3.7.9", + "flake8", ] extras["dev"] = extras["testing"] + extras["quality"] + ["mecab-python3", "scikit-learn", "tensorflow", "torch"] diff --git a/src/transformers/convert_marian_to_pytorch.py b/src/transformers/convert_marian_to_pytorch.py index c140fafca4..bd58534ed3 100644 --- a/src/transformers/convert_marian_to_pytorch.py +++ b/src/transformers/convert_marian_to_pytorch.py @@ -226,7 +226,7 @@ def lmap(f, x) -> List: def fetch_test_set(test_set_url): import wget - fname = wget.download(test_set_url, f"opus_test.txt") + fname = wget.download(test_set_url, "opus_test.txt") lns = Path(fname).open().readlines() src = lmap(str.strip, lns[::4]) gold = lmap(str.strip, lns[1::4]) diff --git a/src/transformers/data/datasets/glue.py b/src/transformers/data/datasets/glue.py index 3c963c2da3..944eb83a3a 100644 --- a/src/transformers/data/datasets/glue.py +++ b/src/transformers/data/datasets/glue.py @@ -114,7 +114,7 @@ class GlueDataset(Dataset): torch.save(self.features, cached_features_file) # ^ This seems to take a lot of time so I want to investigate why and how we can improve. logger.info( - f"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start + "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start ) def __len__(self): diff --git a/src/transformers/data/datasets/language_modeling.py b/src/transformers/data/datasets/language_modeling.py index db2497ba49..6fae7b55c5 100644 --- a/src/transformers/data/datasets/language_modeling.py +++ b/src/transformers/data/datasets/language_modeling.py @@ -65,7 +65,7 @@ class TextDataset(Dataset): with open(cached_features_file, "wb") as handle: pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL) logger.info( - f"Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start + "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start ) def __len__(self): diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py index 6f666c016a..52f1f64d16 100755 --- a/src/transformers/pipelines.py +++ b/src/transformers/pipelines.py @@ -24,7 +24,7 @@ from abc import ABC, abstractmethod from contextlib import contextmanager from itertools import chain from os.path import abspath, exists -from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union import numpy as np @@ -58,6 +58,10 @@ if is_torch_available(): AutoModelWithLMHead, ) +if TYPE_CHECKING: + from .modeling_utils import PreTrainedModel + from .modeling_tf_utils import TFPreTrainedModel + logger = logging.getLogger(__name__) diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index d92f143899..acad8d655f 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -19,11 +19,21 @@ import pickle import shutil import tempfile from collections import OrderedDict -from typing import Dict, Tuple, Union +from typing import TYPE_CHECKING, Dict, Tuple, Union from tests.utils import require_tf, require_torch +if TYPE_CHECKING: + from transformers import ( + PretrainedConfig, + PreTrainedTokenizer, + PreTrainedTokenizerFast, + PreTrainedModel, + TFPreTrainedModel, + ) + + def merge_model_tokenizer_mappings( model_mapping: Dict["PretrainedConfig", Union["PreTrainedModel", "TFPreTrainedModel"]], tokenizer_mapping: Dict["PretrainedConfig", Tuple["PreTrainedTokenizer", "PreTrainedTokenizerFast"]],