diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py index e05cceb274..db595b6457 100755 --- a/examples/language-modeling/run_clm.py +++ b/examples/language-modeling/run_clm.py @@ -213,7 +213,7 @@ def main(): transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py index 4740b7f79d..627618ff5d 100755 --- a/examples/language-modeling/run_mlm.py +++ b/examples/language-modeling/run_mlm.py @@ -223,7 +223,7 @@ def main(): transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/language-modeling/run_mlm_flax.py b/examples/language-modeling/run_mlm_flax.py index c2883118f7..6ab6764931 100755 --- a/examples/language-modeling/run_mlm_flax.py +++ b/examples/language-modeling/run_mlm_flax.py @@ -307,7 +307,7 @@ def create_learning_rate_scheduler( progress = jnp.maximum(0.0, (step - warmup_steps) / float(steps_per_cycle)) ret *= jnp.maximum(0.0, 0.5 * (1.0 + jnp.cos(jnp.pi * (progress % 1.0)))) else: - raise ValueError("Unknown factor %s." % name) + raise ValueError(f"Unknown factor {name}.") return jnp.asarray(ret, dtype=jnp.float32) return step_fn @@ -332,9 +332,7 @@ def accuracy(logits, targets, weights=None): Tuple of scalar loss and batch normalizing factor. """ if logits.ndim != targets.ndim + 1: - raise ValueError( - "Incorrect shapes. Got shape %s logits and %s targets" % (str(logits.shape), str(targets.shape)) - ) + raise ValueError(f"Incorrect shapes. Got shape {logits.shape} logits and {targets.shape} targets") loss = jnp.equal(jnp.argmax(logits, axis=-1), targets) loss *= weights @@ -353,9 +351,7 @@ def cross_entropy(logits, targets, weights=None, label_smoothing=0.0): Tuple of scalar loss and batch normalizing factor. """ if logits.ndim != targets.ndim + 1: - raise ValueError( - "Incorrect shapes. Got shape %s logits and %s targets" % (str(logits.shape), str(targets.shape)) - ) + raise ValueError(f"Incorrect shapes. Got shape {logits.shape} logits and {targets.shape} targets") vocab_size = logits.shape[-1] confidence = 1.0 - label_smoothing @@ -463,7 +459,7 @@ if __name__ == "__main__": ) # Set the verbosity to info of the Transformers logger (on main process only): - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/language-modeling/run_plm.py b/examples/language-modeling/run_plm.py index 0936684d17..6048604c41 100755 --- a/examples/language-modeling/run_plm.py +++ b/examples/language-modeling/run_plm.py @@ -220,7 +220,7 @@ def main(): transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/multiple-choice/run_swag.py b/examples/multiple-choice/run_swag.py index 02fd9e9161..10af91ee6a 100755 --- a/examples/multiple-choice/run_swag.py +++ b/examples/multiple-choice/run_swag.py @@ -247,7 +247,7 @@ def main(): transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/multiple-choice/run_tf_multiple_choice.py b/examples/multiple-choice/run_tf_multiple_choice.py index 5ff4e384d9..dec38bea34 100755 --- a/examples/multiple-choice/run_tf_multiple_choice.py +++ b/examples/multiple-choice/run_tf_multiple_choice.py @@ -116,12 +116,10 @@ def main(): level=logging.INFO, ) logger.warning( - "device: %s, n_replicas: %s, 16-bits training: %s", - training_args.device, - training_args.n_replicas, - training_args.fp16, + f"device: {training_args.device}, n_replicas: {training_args.n_replicas}, " + f"16-bits training: {training_args.fp16}" ) - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed set_seed(training_args.seed) @@ -131,7 +129,7 @@ def main(): label_list = processor.get_labels() num_labels = len(label_list) except KeyError: - raise ValueError("Task not found: %s" % (data_args.task_name)) + raise ValueError(f"Task not found: {data_args.task_name}") # Load pretrained model and tokenizer # @@ -210,8 +208,8 @@ def main(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key, value in result.items(): - logger.info(" %s = %s", key, value) - writer.write("%s = %s\n" % (key, value)) + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") results.update(result) diff --git a/examples/multiple-choice/utils_multiple_choice.py b/examples/multiple-choice/utils_multiple_choice.py index 784a7578d3..b16f827f0d 100644 --- a/examples/multiple-choice/utils_multiple_choice.py +++ b/examples/multiple-choice/utils_multiple_choice.py @@ -99,13 +99,7 @@ if is_torch_available(): processor = processors[task]() cached_features_file = os.path.join( - data_dir, - "cached_{}_{}_{}_{}".format( - mode.value, - tokenizer.__class__.__name__, - str(max_seq_length), - task, - ), + data_dir, f"cached_{mode.value}_{tokenizer.__class__.__name__}_{max_seq_length}_{task}" ) # Make sure only the first process in distributed training processes the dataset, @@ -125,14 +119,14 @@ if is_torch_available(): examples = processor.get_test_examples(data_dir) else: examples = processor.get_train_examples(data_dir) - logger.info("Training examples: %s", len(examples)) + logger.info(f"Training examples: {len(examples)}") self.features = convert_examples_to_features( examples, label_list, max_seq_length, tokenizer, ) - logger.info("Saving features into cached file %s", cached_features_file) + logger.info(f"Saving features into cached file {cached_features_file}") torch.save(self.features, cached_features_file) def __len__(self): @@ -172,7 +166,7 @@ if is_tf_available(): examples = processor.get_test_examples(data_dir) else: examples = processor.get_train_examples(data_dir) - logger.info("Training examples: %s", len(examples)) + logger.info(f"Training examples: {len(examples)}") self.features = convert_examples_to_features( examples, @@ -184,7 +178,7 @@ if is_tf_available(): def gen(): for (ex_index, ex) in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"): if ex_index % 10000 == 0: - logger.info("Writing example %d of %d" % (ex_index, len(examples))) + logger.info(f"Writing example {ex_index} of {len(examples)}") yield ( { @@ -255,7 +249,7 @@ class RaceProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} train".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} train") high = os.path.join(data_dir, "train/high") middle = os.path.join(data_dir, "train/middle") high = self._read_txt(high) @@ -264,7 +258,7 @@ class RaceProcessor(DataProcessor): def get_dev_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") high = os.path.join(data_dir, "dev/high") middle = os.path.join(data_dir, "dev/middle") high = self._read_txt(high) @@ -273,7 +267,7 @@ class RaceProcessor(DataProcessor): def get_test_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} test".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} test") high = os.path.join(data_dir, "test/high") middle = os.path.join(data_dir, "test/middle") high = self._read_txt(high) @@ -298,7 +292,7 @@ class RaceProcessor(DataProcessor): """Creates examples for the training and dev sets.""" examples = [] for (_, data_raw) in enumerate(lines): - race_id = "%s-%s" % (set_type, data_raw["race_id"]) + race_id = f"{set_type}-{data_raw['race_id']}" article = data_raw["article"] for i in range(len(data_raw["answers"])): truth = str(ord(data_raw["answers"][i]) - ord("A")) @@ -322,17 +316,17 @@ class SynonymProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} train".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} train") return self._create_examples(self._read_csv(os.path.join(data_dir, "mctrain.csv")), "train") def get_dev_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") return self._create_examples(self._read_csv(os.path.join(data_dir, "mchp.csv")), "dev") def get_test_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") return self._create_examples(self._read_csv(os.path.join(data_dir, "mctest.csv")), "test") @@ -368,17 +362,17 @@ class SwagProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} train".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} train") return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train") def get_dev_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev") def get_test_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") raise ValueError( "For swag testing, the input file does not contain a label column. It can not be tested in current code" "setting!" @@ -419,16 +413,16 @@ class ArcProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} train".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} train") return self._create_examples(self._read_json(os.path.join(data_dir, "train.jsonl")), "train") def get_dev_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {} dev".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} dev") return self._create_examples(self._read_json(os.path.join(data_dir, "dev.jsonl")), "dev") def get_test_examples(self, data_dir): - logger.info("LOOKING AT {} test".format(data_dir)) + logger.info(f"LOOKING AT {data_dir} test") return self._create_examples(self._read_json(os.path.join(data_dir, "test.jsonl")), "test") def get_labels(self): @@ -450,7 +444,7 @@ class ArcProcessor(DataProcessor): elif truth in "1234": return int(truth) - 1 else: - logger.info("truth ERROR! %s", str(truth)) + logger.info(f"truth ERROR! {truth}") return None examples = [] @@ -496,11 +490,11 @@ class ArcProcessor(DataProcessor): if type == "train": assert len(examples) > 1 assert examples[0].label is not None - logger.info("len examples: %s}", str(len(examples))) - logger.info("Three choices: %s", str(three_choice)) - logger.info("Five choices: %s", str(five_choice)) - logger.info("Other choices: %s", str(other_choices)) - logger.info("four choices: %s", str(four_choice)) + logger.info(f"len examples: {len(examples)}") + logger.info(f"Three choices: {three_choice}") + logger.info(f"Five choices: {five_choice}") + logger.info(f"Other choices: {other_choices}") + logger.info(f"four choices: {four_choice}") return examples @@ -520,7 +514,7 @@ def convert_examples_to_features( features = [] for (ex_index, example) in tqdm.tqdm(enumerate(examples), desc="convert examples to features"): if ex_index % 10000 == 0: - logger.info("Writing example %d of %d" % (ex_index, len(examples))) + logger.info(f"Writing example {ex_index} of {len(examples)}") choices_inputs = [] for ending_idx, (context, ending) in enumerate(zip(example.contexts, example.endings)): text_a = context @@ -570,7 +564,7 @@ def convert_examples_to_features( for f in features[:2]: logger.info("*** Example ***") - logger.info("feature: %s" % f) + logger.info("feature: {f}") return features diff --git a/examples/question-answering/run_qa.py b/examples/question-answering/run_qa.py index 6e4821b1ad..314d71578f 100755 --- a/examples/question-answering/run_qa.py +++ b/examples/question-answering/run_qa.py @@ -240,7 +240,7 @@ def main(): transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/question-answering/run_qa_beam_search.py b/examples/question-answering/run_qa_beam_search.py index 6005a479f2..36bd9a0d75 100755 --- a/examples/question-answering/run_qa_beam_search.py +++ b/examples/question-answering/run_qa_beam_search.py @@ -239,7 +239,7 @@ def main(): transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/question-answering/run_tf_squad.py b/examples/question-answering/run_tf_squad.py index eb2f4089aa..0cad705433 100755 --- a/examples/question-answering/run_tf_squad.py +++ b/examples/question-answering/run_tf_squad.py @@ -148,12 +148,10 @@ def main(): level=logging.INFO, ) logger.info( - "n_replicas: %s, distributed training: %s, 16-bits training: %s", - training_args.n_replicas, - bool(training_args.n_replicas > 1), - training_args.fp16, + f"n_replicas: {training_args.n_replicas}, distributed training: {bool(training_args.n_replicas > 1)}, " + f"16-bits training: {training_args.fp16}" ) - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Prepare Question-Answering task # Load pretrained model and tokenizer diff --git a/examples/seq2seq/run_summarization.py b/examples/seq2seq/run_summarization.py index 2dd1a0719d..dc02f8c71d 100755 --- a/examples/seq2seq/run_summarization.py +++ b/examples/seq2seq/run_summarization.py @@ -294,7 +294,7 @@ def main(): # Set the verbosity to info of the Transformers logger (on main process only): if is_main_process(training_args.local_rank): transformers.utils.logging.set_verbosity_info() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/seq2seq/run_translation.py b/examples/seq2seq/run_translation.py index 496b78fe4e..0755a53413 100755 --- a/examples/seq2seq/run_translation.py +++ b/examples/seq2seq/run_translation.py @@ -264,7 +264,7 @@ def main(): # Set the verbosity to info of the Transformers logger (on main process only): if is_main_process(training_args.local_rank): transformers.utils.logging.set_verbosity_info() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/examples/text-classification/run_tf_glue.py b/examples/text-classification/run_tf_glue.py index 1e162a9f5b..5b6df337e9 100755 --- a/examples/text-classification/run_tf_glue.py +++ b/examples/text-classification/run_tf_glue.py @@ -160,18 +160,16 @@ def main(): level=logging.INFO, ) logger.info( - "n_replicas: %s, distributed training: %s, 16-bits training: %s", - training_args.n_replicas, - bool(training_args.n_replicas > 1), - training_args.fp16, + f"n_replicas: {training_args.n_replicas}, distributed training: {bool(training_args.n_replicas > 1)}, " + f"16-bits training: {training_args.fp16}", ) - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") try: num_labels = glue_tasks_num_labels["mnli" if data_args.task_name == "mnli-mm" else data_args.task_name] output_mode = glue_output_modes[data_args.task_name] except KeyError: - raise ValueError("Task not found: %s" % (data_args.task_name)) + raise ValueError(f"Task not found: {data_args.task_name}") # Load pretrained model and tokenizer # @@ -255,8 +253,8 @@ def main(): logger.info("***** Eval results *****") for key, value in result.items(): - logger.info(" %s = %s", key, value) - writer.write("%s = %s\n" % (key, value)) + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") results.update(result) diff --git a/examples/text-classification/run_tf_text_classification.py b/examples/text-classification/run_tf_text_classification.py index 22fbb0f912..0b31ee30df 100755 --- a/examples/text-classification/run_tf_text_classification.py +++ b/examples/text-classification/run_tf_text_classification.py @@ -225,12 +225,10 @@ def main(): level=logging.INFO, ) logger.info( - "n_replicas: %s, distributed training: %s, 16-bits training: %s", - training_args.n_replicas, - bool(training_args.n_replicas > 1), - training_args.fp16, + f"n_replicas: {training_args.n_replicas}, distributed training: {bool(training_args.n_replicas > 1)}, " + f"16-bits training: {training_args.fp16}" ) - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Load pretrained model and tokenizer # @@ -300,8 +298,8 @@ def main(): logger.info("***** Eval results *****") for key, value in result.items(): - logger.info(" %s = %s", key, value) - writer.write("%s = %s\n" % (key, value)) + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") results.update(result) diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py index 56b1de051b..efb9578738 100755 --- a/examples/text-generation/run_generation.py +++ b/examples/text-generation/run_generation.py @@ -201,12 +201,7 @@ def main(): args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() - logger.warning( - "device: %s, n_gpu: %s, 16-bits training: %s", - args.device, - args.n_gpu, - args.fp16, - ) + logger.warning(f"device: {args.device}, n_gpu: {args.n_gpu}, 16-bits training: {args.fp16}") set_seed(args) @@ -271,7 +266,7 @@ def main(): generated_sequences = [] for generated_sequence_idx, generated_sequence in enumerate(output_sequences): - print("=== GENERATED SEQUENCE {} ===".format(generated_sequence_idx + 1)) + print(f"=== GENERATED SEQUENCE {generated_sequence_idx + 1} ===") generated_sequence = generated_sequence.tolist() # Decode text diff --git a/examples/token-classification/run_ner.py b/examples/token-classification/run_ner.py index 06004f62a2..053a193a60 100755 --- a/examples/token-classification/run_ner.py +++ b/examples/token-classification/run_ner.py @@ -213,7 +213,7 @@ def main(): transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/src/transformers/activations.py b/src/transformers/activations.py index 2035b3f7f8..deade8c868 100644 --- a/src/transformers/activations.py +++ b/src/transformers/activations.py @@ -95,4 +95,4 @@ def get_activation(activation_string): if activation_string in ACT2FN: return ACT2FN[activation_string] else: - raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys()))) + raise KeyError(f"function {activation_string} not found in ACT2FN mapping {list(ACT2FN.keys())}") diff --git a/src/transformers/activations_tf.py b/src/transformers/activations_tf.py index 929dbb310a..583d359189 100644 --- a/src/transformers/activations_tf.py +++ b/src/transformers/activations_tf.py @@ -91,4 +91,4 @@ def get_tf_activation(activation_string): if activation_string in ACT2FN: return ACT2FN[activation_string] else: - raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys()))) + raise KeyError(f"function {activation_string} not found in ACT2FN mapping {list(ACT2FN.keys())}") diff --git a/src/transformers/benchmark/benchmark.py b/src/transformers/benchmark/benchmark.py index d9b17870f9..f64fb88845 100644 --- a/src/transformers/benchmark/benchmark.py +++ b/src/transformers/benchmark/benchmark.py @@ -218,7 +218,7 @@ class PyTorchBenchmark(Benchmark): return min(runtimes) / 10.0 except RuntimeError as e: - self.print_fn("Doesn't fit on GPU. {}".format(e)) + self.print_fn(f"Doesn't fit on GPU. {e}") return "N/A" def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]: @@ -263,5 +263,5 @@ class PyTorchBenchmark(Benchmark): return memory, summary except RuntimeError as e: - self.print_fn("Doesn't fit on GPU. {}".format(e)) + self.print_fn(f"Doesn't fit on GPU. {e}") return "N/A", None diff --git a/src/transformers/benchmark/benchmark_tf.py b/src/transformers/benchmark/benchmark_tf.py index 030c0d2215..7495d449ed 100644 --- a/src/transformers/benchmark/benchmark_tf.py +++ b/src/transformers/benchmark/benchmark_tf.py @@ -227,7 +227,7 @@ class TensorFlowBenchmark(Benchmark): return min(runtimes) / 10.0 except ResourceExhaustedError as e: - self.print_fn("Doesn't fit on GPU. {}".format(e)) + self.print_fn(f"Doesn't fit on GPU. {e}") def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]: logger.info( @@ -290,5 +290,5 @@ class TensorFlowBenchmark(Benchmark): return memory, summary except ResourceExhaustedError as e: - self.print_fn("Doesn't fit on GPU. {}".format(e)) + self.print_fn(f"Doesn't fit on GPU. {e}") return "N/A", None diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py index 5b054614c3..87d8ec986e 100644 --- a/src/transformers/benchmark/benchmark_utils.py +++ b/src/transformers/benchmark/benchmark_utils.py @@ -758,9 +758,7 @@ class Benchmark(ABC): if self.args.env_print: self.print_fn("\n" + 20 * "=" + ("ENVIRONMENT INFORMATION").center(40) + 20 * "=") - self.print_fn( - "\n".join(["- {}: {}".format(prop, val) for prop, val in self.environment_info.items()]) + "\n" - ) + self.print_fn("\n".join([f"- {prop}: {val}" for prop, val in self.environment_info.items()]) + "\n") if self.args.save_to_csv: with open(self.args.env_info_csv_file, mode="w", newline="") as csv_file: @@ -888,9 +886,7 @@ class Benchmark(ABC): self.print_fn("Saving results to csv.") with open(filename, mode="w") as csv_file: - assert len(self.args.model_names) > 0, "At least 1 model should be defined, but got {}".format( - self.model_names - ) + assert len(self.args.model_names) > 0, f"At least 1 model should be defined, but got {self.model_names}" fieldnames = ["model", "batch_size", "sequence_length"] writer = csv.DictWriter(csv_file, fieldnames=fieldnames + ["result"]) diff --git a/src/transformers/commands/convert.py b/src/transformers/commands/convert.py index 6867cf6c01..2ca5a57ca3 100644 --- a/src/transformers/commands/convert.py +++ b/src/transformers/commands/convert.py @@ -76,7 +76,7 @@ class ConvertCommand(BaseTransformersCLICommand): ): self._logger = logging.get_logger("transformers-cli/converting") - self._logger.info("Loading model {}".format(model_type)) + self._logger.info(f"Loading model {model_type}") self._model_type = model_type self._tf_checkpoint = tf_checkpoint self._pytorch_dump_output = pytorch_dump_output diff --git a/src/transformers/commands/env.py b/src/transformers/commands/env.py index beee192ab4..0a8c2b1b60 100644 --- a/src/transformers/commands/env.py +++ b/src/transformers/commands/env.py @@ -56,8 +56,8 @@ class EnvironmentCommand(BaseTransformersCLICommand): "`transformers` version": version, "Platform": platform.platform(), "Python version": platform.python_version(), - "PyTorch version (GPU?)": "{} ({})".format(pt_version, pt_cuda_available), - "Tensorflow version (GPU?)": "{} ({})".format(tf_version, tf_cuda_available), + "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})", + "Tensorflow version (GPU?)": f"{tf_version} ({tf_cuda_available})", "Using GPU in script?": "", "Using distributed or parallel set-up in script?": "", } @@ -69,4 +69,4 @@ class EnvironmentCommand(BaseTransformersCLICommand): @staticmethod def format_dict(d): - return "\n".join(["- {}: {}".format(prop, val) for prop, val in d.items()]) + "\n" + return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n" diff --git a/src/transformers/commands/run.py b/src/transformers/commands/run.py index 768b90007a..856ac6d12d 100644 --- a/src/transformers/commands/run.py +++ b/src/transformers/commands/run.py @@ -31,8 +31,8 @@ def try_infer_format_from_ext(path: str): return ext raise Exception( - "Unable to determine file format from file extension {}. " - "Please provide the format through --format {}".format(path, PipelineDataFormat.SUPPORTED_FORMATS) + f"Unable to determine file format from file extension {path}. " + f"Please provide the format through --format {PipelineDataFormat.SUPPORTED_FORMATS}" ) @@ -105,6 +105,6 @@ class RunCommand(BaseTransformersCLICommand): # Saving data if self._nlp.binary_output: binary_path = self._reader.save_binary(outputs) - logger.warning("Current pipeline requires output to be in binary format, saving at {}".format(binary_path)) + logger.warning(f"Current pipeline requires output to be in binary format, saving at {binary_path}") else: self._reader.save(outputs) diff --git a/src/transformers/commands/serving.py b/src/transformers/commands/serving.py index 7bef8d5eeb..cb4a3fe6c1 100644 --- a/src/transformers/commands/serving.py +++ b/src/transformers/commands/serving.py @@ -133,7 +133,7 @@ class ServeCommand(BaseTransformersCLICommand): "Or install FastAPI and unicorn separately." ) else: - logger.info("Serving model over {}:{}".format(host, port)) + logger.info(f"Serving model over {host}:{port}") self._app = FastAPI( routes=[ APIRoute( diff --git a/src/transformers/commands/train.py b/src/transformers/commands/train.py index a2d3029221..03c8547ed1 100644 --- a/src/transformers/commands/train.py +++ b/src/transformers/commands/train.py @@ -104,7 +104,7 @@ class TrainCommand(BaseTransformersCLICommand): self.column_text = args.column_text self.column_id = args.column_id - self.logger.info("Loading {} pipeline for {}".format(args.task, args.model)) + self.logger.info(f"Loading {args.task} pipeline for {args.model}") if args.task == "text_classification": self.pipeline = TextClassificationPipeline.from_pretrained(args.model) elif args.task == "token_classification": @@ -112,7 +112,7 @@ class TrainCommand(BaseTransformersCLICommand): elif args.task == "question_answering": raise NotImplementedError - self.logger.info("Loading dataset from {}".format(args.train_data)) + self.logger.info(f"Loading dataset from {args.train_data}") self.train_dataset = Processor.create_from_csv( args.train_data, column_label=args.column_label, @@ -122,7 +122,7 @@ class TrainCommand(BaseTransformersCLICommand): ) self.valid_dataset = None if args.validation_data: - self.logger.info("Loading validation dataset from {}".format(args.validation_data)) + self.logger.info(f"Loading validation dataset from {args.validation_data}") self.valid_dataset = Processor.create_from_csv( args.validation_data, column_label=args.column_label, diff --git a/src/transformers/commands/user.py b/src/transformers/commands/user.py index 9a16dec22b..1245084bb9 100644 --- a/src/transformers/commands/user.py +++ b/src/transformers/commands/user.py @@ -99,15 +99,15 @@ class ANSI: @classmethod def bold(cls, s): - return "{}{}{}".format(cls._bold, s, cls._reset) + return f"{cls._bold}{s}{cls._reset}" @classmethod def red(cls, s): - return "{}{}{}".format(cls._bold + cls._red, s, cls._reset) + return f"{cls._bold}{cls._red}{s}{cls._reset}" @classmethod def gray(cls, s): - return "{}{}{}".format(cls._gray, s, cls._reset) + return f"{cls._gray}{s}{cls._reset}" def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str: @@ -268,8 +268,8 @@ class RepoCreateCommand(BaseUserCommand): user, _ = self._api.whoami(token) namespace = self.args.organization if self.args.organization is not None else user - - print("You are about to create {}".format(ANSI.bold(namespace + "/" + self.args.name))) + full_name = f"{namespace}/{self.args.name}" + print(f"You are about to create {ANSI.bold(full_name)}") if not self.args.yes: choice = input("Proceed? [Y/n] ").lower() @@ -283,7 +283,7 @@ class RepoCreateCommand(BaseUserCommand): print(ANSI.red(e.response.text)) exit(1) print("\nYour repo now lives at:") - print(" {}".format(ANSI.bold(url))) + print(f" {ANSI.bold(url)}") print("\nYou can clone it locally with the command below," " and commit/push as usual.") print(f"\n git clone {url}") print("") @@ -328,16 +328,15 @@ class UploadCommand(BaseUserCommand): filename = self.args.filename if self.args.filename is not None else os.path.basename(local_path) files = [(local_path, filename)] else: - raise ValueError("Not a valid file or directory: {}".format(local_path)) + raise ValueError(f"Not a valid file or directory: {local_path}") if sys.platform == "win32": files = [(filepath, filename.replace(os.sep, "/")) for filepath, filename in files] if len(files) > UPLOAD_MAX_FILES: print( - "About to upload {} files to S3. This is probably wrong. Please filter files before uploading.".format( - ANSI.bold(len(files)) - ) + f"About to upload {ANSI.bold(len(files))} files to S3. This is probably wrong. Please filter files " + "before uploading." ) exit(1) @@ -346,9 +345,8 @@ class UploadCommand(BaseUserCommand): for filepath, filename in files: print( - "About to upload file {} to S3 under filename {} and namespace {}".format( - ANSI.bold(filepath), ANSI.bold(filename), ANSI.bold(namespace) - ) + f"About to upload file {ANSI.bold(filepath)} to S3 under filename {ANSI.bold(filename)} and namespace " + f"{ANSI.bold(namespace)}" ) if not self.args.yes: diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index d0631d3f1a..621f855a12 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -267,7 +267,7 @@ class PretrainedConfig(object): try: setattr(self, key, value) except AttributeError as err: - logger.error("Can't set {} with value {} for {}".format(key, value, self)) + logger.error(f"Can't set {key} with value {value} for {self}") raise err @property @@ -296,7 +296,7 @@ class PretrainedConfig(object): @num_labels.setter def num_labels(self, num_labels: int): if self.id2label is None or len(self.id2label) != num_labels: - self.id2label = {i: "LABEL_{}".format(i) for i in range(num_labels)} + self.id2label = {i: f"LABEL_{i}" for i in range(num_labels)} self.label2id = dict(zip(self.id2label.values(), self.id2label.keys())) def save_pretrained(self, save_directory: Union[str, os.PathLike]): @@ -309,7 +309,7 @@ class PretrainedConfig(object): Directory where the configuration JSON file will be saved (will be created if it does not exist). """ if os.path.isfile(save_directory): - raise AssertionError("Provided path ({}) should be a directory, not a file".format(save_directory)) + raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") os.makedirs(save_directory, exist_ok=True) # If we save using the predefined names, we can load using `from_pretrained` output_config_file = os.path.join(save_directory, CONFIG_NAME) @@ -467,16 +467,16 @@ class PretrainedConfig(object): except json.JSONDecodeError: msg = ( - "Couldn't reach server at '{}' to download configuration file or " + f"Couldn't reach server at '{config_file}' to download configuration file or " "configuration file is not a valid JSON file. " - "Please check network or file content here: {}.".format(config_file, resolved_config_file) + f"Please check network or file content here: {resolved_config_file}." ) raise EnvironmentError(msg) if resolved_config_file == config_file: - logger.info("loading configuration file {}".format(config_file)) + logger.info(f"loading configuration file {config_file}") else: - logger.info("loading configuration file {} from cache at {}".format(config_file, resolved_config_file)) + logger.info(f"loading configuration file {config_file} from cache at {resolved_config_file}") return config_dict, kwargs @@ -512,7 +512,7 @@ class PretrainedConfig(object): for key in to_remove: kwargs.pop(key, None) - logger.info("Model config %s", str(config)) + logger.info(f"Model config {config}") if return_unused_kwargs: return config, kwargs else: @@ -544,7 +544,7 @@ class PretrainedConfig(object): return self.__dict__ == other.__dict__ def __repr__(self): - return "{} {}".format(self.__class__.__name__, self.to_json_string()) + return f"{self.__class__.__name__} {self.to_json_string()}" def to_diff_dict(self) -> Dict[str, Any]: """ diff --git a/src/transformers/convert_graph_to_onnx.py b/src/transformers/convert_graph_to_onnx.py index 8db247f2cf..47fd6ca329 100644 --- a/src/transformers/convert_graph_to_onnx.py +++ b/src/transformers/convert_graph_to_onnx.py @@ -154,7 +154,7 @@ def ensure_valid_input(model, tokens, input_names): print(f"{arg_name} is not present in the generated input list.") break - print("Generated inputs order: {}".format(ordered_input_names)) + print(f"Generated inputs order: {ordered_input_names}") return ordered_input_names, tuple(model_args) diff --git a/src/transformers/convert_pytorch_checkpoint_to_tf2.py b/src/transformers/convert_pytorch_checkpoint_to_tf2.py index 3b8450e0e9..87420d6f0c 100755 --- a/src/transformers/convert_pytorch_checkpoint_to_tf2.py +++ b/src/transformers/convert_pytorch_checkpoint_to_tf2.py @@ -294,7 +294,7 @@ def convert_pt_checkpoint_to_tf( model_type, pytorch_checkpoint_path, config_file, tf_dump_path, compare_with_pt_model=False, use_cached_models=True ): if model_type not in MODEL_CLASSES: - raise ValueError("Unrecognized model type, should be one of {}.".format(list(MODEL_CLASSES.keys()))) + raise ValueError(f"Unrecognized model type, should be one of {list(MODEL_CLASSES.keys())}.") config_class, model_class, pt_model_class, aws_config_map = MODEL_CLASSES[model_type] @@ -304,7 +304,7 @@ def convert_pt_checkpoint_to_tf( config = config_class.from_json_file(config_file) config.output_hidden_states = True config.output_attentions = True - print("Building TensorFlow model from configuration: {}".format(str(config))) + print(f"Building TensorFlow model from configuration: {config}") tf_model = model_class(config) # Load weights from tf checkpoint @@ -328,11 +328,11 @@ def convert_pt_checkpoint_to_tf( np_pt = pto[0].numpy() np_tf = tfo[0].numpy() diff = np.amax(np.abs(np_pt - np_tf)) - print("Max absolute difference between models outputs {}".format(diff)) - assert diff <= 2e-2, "Error, model absolute difference is >2e-2: {}".format(diff) + print(f"Max absolute difference between models outputs {diff}") + assert diff <= 2e-2, f"Error, model absolute difference is >2e-2: {diff}" # Save pytorch-model - print("Save TensorFlow model to {}".format(tf_dump_path)) + print(f"Save TensorFlow model to {tf_dump_path}") tf_model.save_weights(tf_dump_path, save_format="h5") @@ -354,12 +354,10 @@ def convert_all_pt_checkpoints_to_tf( for j, model_type in enumerate(model_types, start=1): print("=" * 100) - print(" Converting model type {}/{}: {}".format(j, len(model_types), model_type)) + print(f" Converting model type {j}/{len(model_types)}: {model_type}") print("=" * 100) if model_type not in MODEL_CLASSES: - raise ValueError( - "Unrecognized model type {}, should be one of {}.".format(model_type, list(MODEL_CLASSES.keys())) - ) + raise ValueError(f"Unrecognized model type {model_type}, should be one of {list(MODEL_CLASSES.keys())}.") config_class, model_class, pt_model_class, aws_model_maps, aws_config_map = MODEL_CLASSES[model_type] @@ -374,16 +372,14 @@ def convert_all_pt_checkpoints_to_tf( print("-" * 100) if "-squad" in model_shortcut_name or "-mrpc" in model_shortcut_name or "-mnli" in model_shortcut_name: if not only_convert_finetuned_models: - print(" Skipping finetuned checkpoint {}".format(model_shortcut_name)) + print(f" Skipping finetuned checkpoint {model_shortcut_name}") continue model_type = model_shortcut_name elif only_convert_finetuned_models: - print(" Skipping not finetuned checkpoint {}".format(model_shortcut_name)) + print(f" Skipping not finetuned checkpoint {model_shortcut_name}") continue print( - " Converting checkpoint {}/{}: {} - model_type {}".format( - i, len(aws_config_map), model_shortcut_name, model_type - ) + f" Converting checkpoint {i}/{len(aws_config_map)}: {model_shortcut_name} - model_type {model_type}" ) print("-" * 100) @@ -422,9 +418,8 @@ if __name__ == "__main__": "--model_type", default=None, type=str, - help="Model type selected in the list of {}. If not given, will download and convert all the models from AWS.".format( - list(MODEL_CLASSES.keys()) - ), + help=f"Model type selected in the list of {list(MODEL_CLASSES.keys())}. If not given, will download and " + "convert all the models from AWS.", ) parser.add_argument( "--pytorch_checkpoint_path", diff --git a/src/transformers/convert_slow_tokenizer.py b/src/transformers/convert_slow_tokenizer.py index 032ed51d5f..e98c635d04 100644 --- a/src/transformers/convert_slow_tokenizer.py +++ b/src/transformers/convert_slow_tokenizer.py @@ -633,7 +633,7 @@ class T5Converter(SpmConverter): def vocab(self, proto): num_extra_ids = self.original_tokenizer._extra_ids vocab = [(piece.piece, piece.score) for piece in proto.pieces] - vocab += [("".format(i), 0.0) for i in range(num_extra_ids - 1, -1, -1)] + vocab += [(f"", 0.0) for i in range(num_extra_ids - 1, -1, -1)] return vocab def post_processor(self): diff --git a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py index d78608633e..208ecb640c 100755 --- a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py +++ b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py @@ -33,7 +33,7 @@ TOKENIZER_CLASSES = {name: getattr(transformers, name + "Fast") for name in SLOW def convert_slow_checkpoint_to_fast(tokenizer_name, checkpoint_name, dump_path, force_download): if tokenizer_name is not None and tokenizer_name not in TOKENIZER_CLASSES: - raise ValueError("Unrecognized tokenizer name, should be one of {}.".format(list(TOKENIZER_CLASSES.keys()))) + raise ValueError(f"Unrecognized tokenizer name, should be one of {list(TOKENIZER_CLASSES.keys())}.") if tokenizer_name is None: tokenizer_names = TOKENIZER_CLASSES @@ -60,9 +60,7 @@ def convert_slow_checkpoint_to_fast(tokenizer_name, checkpoint_name, dump_path, tokenizer = tokenizer_class.from_pretrained(checkpoint, force_download=force_download) # Save fast tokenizer - logger.info( - "Save fast tokenizer to {} with prefix {} add_prefix {}".format(dump_path, checkpoint, add_prefix) - ) + logger.info(f"Save fast tokenizer to {dump_path} with prefix {checkpoint} add_prefix {add_prefix}") # For organization names we create sub-directories if "/" in checkpoint: @@ -75,9 +73,7 @@ def convert_slow_checkpoint_to_fast(tokenizer_name, checkpoint_name, dump_path, checkpoint_prefix_name = None dump_path_full = dump_path - logger.info( - "=> {} with prefix {}, add_prefix {}".format(dump_path_full, checkpoint_prefix_name, add_prefix) - ) + logger.info(f"=> {dump_path_full} with prefix {checkpoint_prefix_name}, add_prefix {add_prefix}") if checkpoint in list(tokenizer.pretrained_vocab_files_map.values())[0]: file_path = list(tokenizer.pretrained_vocab_files_map.values())[0][checkpoint] @@ -86,19 +82,17 @@ def convert_slow_checkpoint_to_fast(tokenizer_name, checkpoint_name, dump_path, dump_path_full = os.path.join(dump_path_full, checkpoint_prefix_name) checkpoint_prefix_name = None - logger.info( - "=> {} with prefix {}, add_prefix {}".format(dump_path_full, checkpoint_prefix_name, add_prefix) - ) + logger.info(f"=> {dump_path_full} with prefix {checkpoint_prefix_name}, add_prefix {add_prefix}") file_names = tokenizer.save_pretrained( dump_path_full, legacy_format=False, filename_prefix=checkpoint_prefix_name ) - logger.info("=> File names {}".format(file_names)) + logger.info(f"=> File names {file_names}") for file_name in file_names: if not file_name.endswith("tokenizer.json"): os.remove(file_name) - logger.info("=> removing {}".format(file_name)) + logger.info(f"=> removing {file_name}") if __name__ == "__main__": @@ -111,9 +105,8 @@ if __name__ == "__main__": "--tokenizer_name", default=None, type=str, - help="Optional tokenizer type selected in the list of {}. If not given, will download and convert all the checkpoints from AWS.".format( - list(TOKENIZER_CLASSES.keys()) - ), + help=f"Optional tokenizer type selected in the list of {list(TOKENIZER_CLASSES.keys())}. If not given, will " + "download and convert all the checkpoints from AWS.", ) parser.add_argument( "--checkpoint_name", diff --git a/src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py b/src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py index 5707a09977..9be405f471 100755 --- a/src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py +++ b/src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py @@ -46,7 +46,7 @@ def convert_tf_checkpoint_to_pytorch(tf_hub_path, pytorch_dump_path, is_encoder_ model = BertGenerationEncoder(config) else: model = BertGenerationDecoder(config) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") # Load weights from tf checkpoint load_tf_weights_in_bert_generation( @@ -58,7 +58,7 @@ def convert_tf_checkpoint_to_pytorch(tf_hub_path, pytorch_dump_path, is_encoder_ ) # Save pytorch-model - print("Save PyTorch model and config to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model and config to {pytorch_dump_path}") model.save_pretrained(pytorch_dump_path) diff --git a/src/transformers/data/datasets/glue.py b/src/transformers/data/datasets/glue.py index 68df53acb3..2409dfa34e 100644 --- a/src/transformers/data/datasets/glue.py +++ b/src/transformers/data/datasets/glue.py @@ -101,12 +101,7 @@ class GlueDataset(Dataset): # Load data features from cache or dataset file cached_features_file = os.path.join( cache_dir if cache_dir is not None else args.data_dir, - "cached_{}_{}_{}_{}".format( - mode.value, - tokenizer.__class__.__name__, - str(args.max_seq_length), - args.task_name, - ), + f"cached_{mode.value}_{tokenizer.__class__.__name__}_{args.max_seq_length}_{args.task_name}", ) label_list = self.processor.get_labels() if args.task_name in ["mnli", "mnli-mm"] and tokenizer.__class__.__name__ in ( @@ -153,7 +148,7 @@ class GlueDataset(Dataset): torch.save(self.features, cached_features_file) # ^ This seems to take a lot of time so I want to investigate why and how we can improve. logger.info( - "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start + f"Saving features into cached file {cached_features_file} [took {time.time() - start:.3f} s]" ) def __len__(self): diff --git a/src/transformers/data/datasets/language_modeling.py b/src/transformers/data/datasets/language_modeling.py index f9c3811539..10afcaf6e7 100644 --- a/src/transformers/data/datasets/language_modeling.py +++ b/src/transformers/data/datasets/language_modeling.py @@ -64,11 +64,7 @@ class TextDataset(Dataset): directory, filename = os.path.split(file_path) cached_features_file = os.path.join( cache_dir if cache_dir is not None else directory, - "cached_lm_{}_{}_{}".format( - tokenizer.__class__.__name__, - str(block_size), - filename, - ), + f"cached_lm_{tokenizer.__class__.__name__}_{block_size}_{filename}", ) # Make sure only the first process in distributed training processes the dataset, @@ -105,7 +101,7 @@ class TextDataset(Dataset): with open(cached_features_file, "wb") as handle: pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL) logger.info( - "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start + f"Saving features into cached file {cached_features_file} [took {time.time() - start:.3f} s]" ) def __len__(self): @@ -131,7 +127,7 @@ class LineByLineTextDataset(Dataset): # Here, we do not cache the features, operating under the assumption # that we will soon use fast multithreaded tokenizers from the # `tokenizers` repo everywhere =) - logger.info("Creating features from dataset file at %s", file_path) + logger.info(f"Creating features from dataset file at {file_path}") with open(file_path, encoding="utf-8") as f: lines = [line for line in f.read().splitlines() if (len(line) > 0 and not line.isspace())] @@ -164,8 +160,8 @@ class LineByLineWithRefDataset(Dataset): # Here, we do not cache the features, operating under the assumption # that we will soon use fast multithreaded tokenizers from the # `tokenizers` repo everywhere =) - logger.info("Creating features from dataset file at %s", file_path) - logger.info("Use ref segment results at %s", ref_path) + logger.info(f"Creating features from dataset file at {file_path}") + logger.info(f"Use ref segment results at {ref_path}") with open(file_path, encoding="utf-8") as f: data = f.readlines() # use this method to avoid delimiter '\u2029' to split a line data = [line.strip() for line in data if len(line) > 0 and not line.isspace()] @@ -365,11 +361,7 @@ class TextDatasetForNextSentencePrediction(Dataset): directory, filename = os.path.split(file_path) cached_features_file = os.path.join( directory, - "cached_nsp_{}_{}_{}".format( - tokenizer.__class__.__name__, - str(block_size), - filename, - ), + f"cached_nsp_{tokenizer.__class__.__name__}_{block_size}_{filename}", ) self.tokenizer = tokenizer @@ -427,7 +419,7 @@ class TextDatasetForNextSentencePrediction(Dataset): with open(cached_features_file, "wb") as handle: pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL) logger.info( - "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start + f"Saving features into cached file {cached_features_file} [took {time.time() - start:.3f} s]" ) def create_examples_from_document(self, document: List[List[int]], doc_index: int): diff --git a/src/transformers/data/datasets/squad.py b/src/transformers/data/datasets/squad.py index db8c6ec26e..00f433e4a3 100644 --- a/src/transformers/data/datasets/squad.py +++ b/src/transformers/data/datasets/squad.py @@ -131,12 +131,7 @@ class SquadDataset(Dataset): version_tag = "v2" if args.version_2_with_negative else "v1" cached_features_file = os.path.join( cache_dir if cache_dir is not None else args.data_dir, - "cached_{}_{}_{}_{}".format( - mode.value, - tokenizer.__class__.__name__, - str(args.max_seq_length), - version_tag, - ), + f"cached_{mode.value}_{tokenizer.__class__.__name__}_{args.max_seq_length}_{version_tag}", ) # Make sure only the first process in distributed training processes the dataset, @@ -184,7 +179,7 @@ class SquadDataset(Dataset): ) # ^ This seems to take a lot of time so I want to investigate why and how we can improve. logger.info( - "Saving features into cached file %s [took %.3f s]", cached_features_file, time.time() - start + f"Saving features into cached file {cached_features_file} [took {time.time() - start:.3f} s]" ) def __len__(self): diff --git a/src/transformers/data/metrics/squad_metrics.py b/src/transformers/data/metrics/squad_metrics.py index 94ce573f75..f55e827f07 100644 --- a/src/transformers/data/metrics/squad_metrics.py +++ b/src/transformers/data/metrics/squad_metrics.py @@ -96,7 +96,7 @@ def get_raw_scores(examples, preds): gold_answers = [""] if qas_id not in preds: - print("Missing prediction for %s" % qas_id) + print(f"Missing prediction for {qas_id}") continue prediction = preds[qas_id] @@ -140,7 +140,7 @@ def make_eval_dict(exact_scores, f1_scores, qid_list=None): def merge_eval(main_eval, new_eval, prefix): for k in new_eval: - main_eval["%s_%s" % (prefix, k)] = new_eval[k] + main_eval[f"{prefix}_{k}"] = new_eval[k] def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans): @@ -302,7 +302,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): start_position = tok_text.find(pred_text) if start_position == -1: if verbose_logging: - logger.info("Unable to find text: '%s' in '%s'" % (pred_text, orig_text)) + logger.info(f"Unable to find text: '{pred_text}' in '{orig_text}'") return orig_text end_position = start_position + len(pred_text) - 1 @@ -311,7 +311,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False): if len(orig_ns_text) != len(tok_ns_text): if verbose_logging: - logger.info("Length not equal after stripping spaces: '%s' vs '%s'", orig_ns_text, tok_ns_text) + logger.info(f"Length not equal after stripping spaces: '{orig_ns_text}' vs '{tok_ns_text}'") return orig_text # We then project the characters in `pred_text` back to `orig_text` using @@ -615,8 +615,7 @@ def compute_predictions_log_probs( "NbestPrediction", ["text", "start_log_prob", "end_log_prob"] ) - logger.info("Writing predictions to: %s", output_prediction_file) - # logger.info("Writing nbest to: %s" % (output_nbest_file)) + logger.info(f"Writing predictions to: {output_prediction_file}") example_index_to_features = collections.defaultdict(list) for feature in all_features: diff --git a/src/transformers/data/processors/glue.py b/src/transformers/data/processors/glue.py index 0e1f244305..d130a337c2 100644 --- a/src/transformers/data/processors/glue.py +++ b/src/transformers/data/processors/glue.py @@ -122,10 +122,10 @@ def _glue_convert_examples_to_features( processor = glue_processors[task]() if label_list is None: label_list = processor.get_labels() - logger.info("Using label list %s for task %s" % (label_list, task)) + logger.info(f"Using label list {label_list} for task {task}") if output_mode is None: output_mode = glue_output_modes[task] - logger.info("Using output mode %s for task %s" % (output_mode, task)) + logger.info(f"Using output mode {output_mode} for task {task}") label_map = {label: i for i, label in enumerate(label_list)} @@ -156,8 +156,8 @@ def _glue_convert_examples_to_features( for i, example in enumerate(examples[:5]): logger.info("*** Example ***") - logger.info("guid: %s" % (example.guid)) - logger.info("features: %s" % features[i]) + logger.info(f"guid: {example.guid}") + logger.info(f"features: {features[i]}") return features @@ -185,7 +185,7 @@ class MrpcProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" - logger.info("LOOKING AT {}".format(os.path.join(data_dir, "train.tsv"))) + logger.info(f"LOOKING AT {os.path.join(data_dir, 'train.tsv')}") return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") def get_dev_examples(self, data_dir): @@ -206,7 +206,7 @@ class MrpcProcessor(DataProcessor): for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % (set_type, i) + guid = f"{set_type}-{i}" text_a = line[3] text_b = line[4] label = None if set_type == "test" else line[0] @@ -252,7 +252,7 @@ class MnliProcessor(DataProcessor): for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % (set_type, line[0]) + guid = f"{set_type}-{line[0]}" text_a = line[8] text_b = line[9] label = None if set_type.startswith("test") else line[-1] @@ -316,7 +316,7 @@ class ColaProcessor(DataProcessor): text_index = 1 if test_mode else 3 examples = [] for (i, line) in enumerate(lines): - guid = "%s-%s" % (set_type, i) + guid = f"{set_type}-{i}" text_a = line[text_index] label = None if test_mode else line[1] examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) @@ -362,7 +362,7 @@ class Sst2Processor(DataProcessor): for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % (set_type, i) + guid = f"{set_type}-{i}" text_a = line[text_index] label = None if set_type == "test" else line[1] examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) @@ -407,7 +407,7 @@ class StsbProcessor(DataProcessor): for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % (set_type, line[0]) + guid = f"{set_type}-{line[0]}" text_a = line[7] text_b = line[8] label = None if set_type == "test" else line[-1] @@ -456,7 +456,7 @@ class QqpProcessor(DataProcessor): for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % (set_type, line[0]) + guid = f"{set_type}-{line[0]}" try: text_a = line[q1_index] text_b = line[q2_index] @@ -505,7 +505,7 @@ class QnliProcessor(DataProcessor): for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % (set_type, line[0]) + guid = f"{set_type}-{line[0]}" text_a = line[1] text_b = line[2] label = None if set_type == "test" else line[-1] @@ -551,7 +551,7 @@ class RteProcessor(DataProcessor): for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % (set_type, line[0]) + guid = f"{set_type}-{line[0]}" text_a = line[1] text_b = line[2] label = None if set_type == "test" else line[-1] @@ -597,7 +597,7 @@ class WnliProcessor(DataProcessor): for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % (set_type, line[0]) + guid = f"{set_type}-{line[0]}" text_a = line[1] text_b = line[2] label = None if set_type == "test" else line[-1] diff --git a/src/transformers/data/processors/squad.py b/src/transformers/data/processors/squad.py index c1815c1f9c..54134bfa45 100644 --- a/src/transformers/data/processors/squad.py +++ b/src/transformers/data/processors/squad.py @@ -115,7 +115,7 @@ def squad_convert_example_to_features( actual_text = " ".join(example.doc_tokens[start_position : (end_position + 1)]) cleaned_answer_text = " ".join(whitespace_tokenize(example.answer_text)) if actual_text.find(cleaned_answer_text) == -1: - logger.warning("Could not find answer: '%s' vs. '%s'", actual_text, cleaned_answer_text) + logger.warning(f"Could not find answer: '{actual_text}' vs. '{cleaned_answer_text}'") return [] tok_to_orig_index = [] diff --git a/src/transformers/data/processors/utils.py b/src/transformers/data/processors/utils.py index 0fb3f40b9c..06db91f7e2 100644 --- a/src/transformers/data/processors/utils.py +++ b/src/transformers/data/processors/utils.py @@ -186,7 +186,7 @@ class SingleSentenceClassificationProcessor(DataProcessor): if column_id is not None: ids.append(line[column_id]) else: - guid = "%s-%s" % (split_name, i) if split_name else "%s" % i + guid = f"{split_name}-{i}" if split_name else str(i) ids.append(guid) return self.add_examples( @@ -265,7 +265,7 @@ class SingleSentenceClassificationProcessor(DataProcessor): all_input_ids = [] for (ex_index, example) in enumerate(self.examples): if ex_index % 10000 == 0: - logger.info("Tokenizing example %d", ex_index) + logger.info(f"Tokenizing example {ex_index}") input_ids = tokenizer.encode( example.text_a, @@ -279,7 +279,7 @@ class SingleSentenceClassificationProcessor(DataProcessor): features = [] for (ex_index, (input_ids, example)) in enumerate(zip(all_input_ids, self.examples)): if ex_index % 10000 == 0: - logger.info("Writing example %d/%d" % (ex_index, len(self.examples))) + logger.info(f"Writing example {ex_index}/{len(self.examples)}") # The mask has 1 for real tokens and 0 for padding tokens. Only real # tokens are attended to. attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) @@ -293,12 +293,10 @@ class SingleSentenceClassificationProcessor(DataProcessor): input_ids = input_ids + ([pad_token] * padding_length) attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length) - assert len(input_ids) == batch_length, "Error with input length {} vs {}".format( - len(input_ids), batch_length - ) - assert len(attention_mask) == batch_length, "Error with input length {} vs {}".format( - len(attention_mask), batch_length - ) + assert len(input_ids) == batch_length, f"Error with input length {len(input_ids)} vs {batch_length}" + assert ( + len(attention_mask) == batch_length + ), f"Error with input length {len(attention_mask)} vs {batch_length}" if self.mode == "classification": label = label_map[example.label] @@ -309,10 +307,10 @@ class SingleSentenceClassificationProcessor(DataProcessor): if ex_index < 5 and self.verbose: logger.info("*** Example ***") - logger.info("guid: %s" % (example.guid)) - logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) - logger.info("attention_mask: %s" % " ".join([str(x) for x in attention_mask])) - logger.info("label: %s (id = %d)" % (example.label, label)) + logger.info(f"guid: {example.guid}") + logger.info(f"input_ids: {' '.join([str(x) for x in input_ids])}") + logger.info(f"attention_mask: {' '.join([str(x) for x in attention_mask])}") + logger.info(f"label: {example.label} (id = {label})") features.append(InputFeatures(input_ids=input_ids, attention_mask=attention_mask, label=label)) diff --git a/src/transformers/data/processors/xnli.py b/src/transformers/data/processors/xnli.py index c77442480f..590131f981 100644 --- a/src/transformers/data/processors/xnli.py +++ b/src/transformers/data/processors/xnli.py @@ -38,12 +38,12 @@ class XnliProcessor(DataProcessor): def get_train_examples(self, data_dir): """See base class.""" lg = self.language if self.train_language is None else self.train_language - lines = self._read_tsv(os.path.join(data_dir, "XNLI-MT-1.0/multinli/multinli.train.{}.tsv".format(lg))) + lines = self._read_tsv(os.path.join(data_dir, f"XNLI-MT-1.0/multinli/multinli.train.{lg}.tsv")) examples = [] for (i, line) in enumerate(lines): if i == 0: continue - guid = "%s-%s" % ("train", i) + guid = f"train-{i}" text_a = line[0] text_b = line[1] label = "contradiction" if line[2] == "contradictory" else line[2] @@ -63,7 +63,7 @@ class XnliProcessor(DataProcessor): language = line[0] if language != self.language: continue - guid = "%s-%s" % ("test", i) + guid = f"test-{i}" text_a = line[6] text_b = line[7] label = line[1] diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index e795501ad3..597435fad2 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -583,8 +583,8 @@ def add_start_docstrings(*docstr): def add_start_docstrings_to_model_forward(*docstr): def docstring_decorator(fn): - class_name = ":class:`~transformers.{}`".format(fn.__qualname__.split(".")[0]) - intro = " The {} forward method, overrides the :func:`__call__` special method.".format(class_name) + class_name = f":class:`~transformers.{fn.__qualname__.split('.')[0]}`" + intro = f" The {class_name} forward method, overrides the :func:`__call__` special method." note = r""" .. note:: @@ -1048,11 +1048,11 @@ def filename_to_url(filename, cache_dir=None): cache_path = os.path.join(cache_dir, filename) if not os.path.exists(cache_path): - raise EnvironmentError("file {} not found".format(cache_path)) + raise EnvironmentError(f"file {cache_path} not found") meta_path = cache_path + ".json" if not os.path.exists(meta_path): - raise EnvironmentError("file {} not found".format(meta_path)) + raise EnvironmentError(f"file {meta_path} not found") with open(meta_path, encoding="utf-8") as meta_file: metadata = json.load(meta_file) @@ -1158,10 +1158,10 @@ def cached_path( output_path = url_or_filename elif urlparse(url_or_filename).scheme == "": # File, but it doesn't exist. - raise EnvironmentError("file {} not found".format(url_or_filename)) + raise EnvironmentError(f"file {url_or_filename} not found") else: # Something unknown - raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename)) + raise ValueError(f"unable to parse {url_or_filename} as a URL or as a local path") if extract_compressed_file: if not is_zipfile(output_path) and not tarfile.is_tarfile(output_path): @@ -1190,7 +1190,7 @@ def cached_path( tar_file.extractall(output_path_extracted) tar_file.close() else: - raise EnvironmentError("Archive format of {} could not be identified".format(output_path)) + raise EnvironmentError(f"Archive format of {output_path} could not be identified") return output_path_extracted @@ -1252,7 +1252,7 @@ def http_get(url: str, temp_file: BinaryIO, proxies=None, resume_size=0, headers """ headers = copy.deepcopy(headers) if resume_size > 0: - headers["Range"] = "bytes=%d-" % (resume_size,) + headers["Range"] = f"bytes={resume_size}-" r = requests.get(url, stream=True, proxies=proxies, headers=headers) r.raise_for_status() content_length = r.headers.get("Content-Length") @@ -1302,12 +1302,12 @@ def get_from_cache( headers = {"user-agent": http_user_agent(user_agent)} if isinstance(use_auth_token, str): - headers["authorization"] = "Bearer {}".format(use_auth_token) + headers["authorization"] = f"Bearer {use_auth_token}" elif use_auth_token: token = HfFolder.get_token() if token is None: raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.") - headers["authorization"] = "Bearer {}".format(token) + headers["authorization"] = f"Bearer {token}" url_to_download = url etag = None @@ -1404,14 +1404,14 @@ def get_from_cache( # Download to temporary file, then copy to cache dir once finished. # Otherwise you get corrupt cache entries if the download gets interrupted. with temp_file_manager() as temp_file: - logger.info("%s not found in cache or force_download set to True, downloading to %s", url, temp_file.name) + logger.info(f"{url} not found in cache or force_download set to True, downloading to {temp_file.name}") http_get(url_to_download, temp_file, proxies=proxies, resume_size=resume_size, headers=headers) - logger.info("storing %s in cache at %s", url, cache_path) + logger.info(f"storing {url} in cache at {cache_path}") os.replace(temp_file.name, cache_path) - logger.info("creating metadata file for %s", cache_path) + logger.info(f"creating metadata file for {cache_path}") meta = {"url": url, "etag": etag} meta_path = cache_path + ".json" with open(meta_path, "w") as meta_file: @@ -1625,8 +1625,7 @@ class ExplicitEnum(Enum): @classmethod def _missing_(cls, value): raise ValueError( - "%r is not a valid %s, please select one of %s" - % (value, cls.__name__, str(list(cls._value2member_map_.keys()))) + f"{value} is not a valid {cls.__name__}, please select one of {list(cls._value2member_map_.keys())}" ) diff --git a/src/transformers/generation_beam_search.py b/src/transformers/generation_beam_search.py index a2e2cb4753..063bda641f 100644 --- a/src/transformers/generation_beam_search.py +++ b/src/transformers/generation_beam_search.py @@ -218,7 +218,7 @@ class BeamSearchScorer(BeamScorer): if self._done[batch_idx]: assert ( len(beam_hyp) >= self.num_beams - ), "Batch can only be done if at least {} beams have been generated".format(self.num_beams) + ), f"Batch can only be done if at least {self.num_beams} beams have been generated" assert ( eos_token_id is not None and pad_token_id is not None ), "generated beams >= num_beams -> eos_token_id and pad_token have to be defined" diff --git a/src/transformers/generation_logits_process.py b/src/transformers/generation_logits_process.py index 5b4286db81..e40ca17116 100644 --- a/src/transformers/generation_logits_process.py +++ b/src/transformers/generation_logits_process.py @@ -371,9 +371,7 @@ class NoBadWordsLogitsProcessor(LogitsProcessor): self.bad_words_ids = list(filter(lambda bad_token_seq: bad_token_seq != [eos_token_id], bad_words_ids)) for banned_token_seq in self.bad_words_ids: - assert len(banned_token_seq) > 0, "Banned words token sequences {} cannot have an empty list".format( - bad_words_ids - ) + assert len(banned_token_seq) > 0, f"Banned words token sequences {bad_words_ids} cannot have an empty list" def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: banned_tokens = self._calc_banned_bad_words_ids(input_ids) diff --git a/src/transformers/generation_tf_utils.py b/src/transformers/generation_tf_utils.py index 84a7880d0d..8c0802e952 100644 --- a/src/transformers/generation_tf_utils.py +++ b/src/transformers/generation_tf_utils.py @@ -159,7 +159,7 @@ class TFGenerationMixin: tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer model = TFAutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from huggingface.co and cache. outputs = model.generate(max_length=40) # do greedy decoding - print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) + print(f'Generated: {tokenizer.decode(outputs[0], skip_special_tokens=True)}') tokenizer = AutoTokenizer.from_pretrained('openai-gpt') # Initialize tokenizer model = TFAutoModelWithLMHead.from_pretrained('openai-gpt') # Download model and configuration from huggingface.co and cache. @@ -167,7 +167,7 @@ class TFGenerationMixin: input_ids = tokenizer.encode(input_context, return_tensors='tf') # encode input context outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5) # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog' for i in range(3): # 3 output sequences were generated - print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True))) + print(f'Generated {i}: {tokenizer.decode(outputs[i], skip_special_tokens=True)}') tokenizer = AutoTokenizer.from_pretrained('distilgpt2') # Initialize tokenizer model = TFAutoModelWithLMHead.from_pretrained('distilgpt2') # Download model and configuration from huggingface.co and cache. @@ -175,14 +175,14 @@ class TFGenerationMixin: input_ids = tokenizer.encode(input_context, return_tensors='tf') # encode input context outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3, do_sample=True) # generate 3 candidates using sampling for i in range(3): # 3 output sequences were generated - print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True))) + print(f'Generated {i}: {tokenizer.decode(outputs[i], skip_special_tokens=True)}') tokenizer = AutoTokenizer.from_pretrained('ctrl') # Initialize tokenizer model = TFAutoModelWithLMHead.from_pretrained('ctrl') # Download model and configuration from huggingface.co and cache. input_context = 'Legal My neighbor is' # "Legal" is one of the control codes for ctrl input_ids = tokenizer.encode(input_context, return_tensors='tf') # encode input context outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2) # generate sequences - print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True))) + print(f'Generated: {tokenizer.decode(outputs[0], skip_special_tokens=True)}') tokenizer = AutoTokenizer.from_pretrained('gpt2') # Initialize tokenizer model = TFAutoModelWithLMHead.from_pretrained('gpt2') # Download model and configuration from huggingface.co and cache. @@ -291,9 +291,7 @@ class TFGenerationMixin: attention_mask = tf.ones_like(input_ids) if pad_token_id is None and eos_token_id is not None: - logger.warning( - "Setting `pad_token_id` to {} (first `eos_token_id`) to generate sequence".format(eos_token_id) - ) + logger.warning(f"Setting `pad_token_id` to {eos_token_id} (first `eos_token_id`) to generate sequence") pad_token_id = eos_token_id # current position and vocab size @@ -315,8 +313,8 @@ class TFGenerationMixin: assert ( decoder_start_token_id is not None ), "decoder_start_token_id or bos_token_id has to be defined for encoder-decoder generation" - assert hasattr(self, "get_encoder"), "{} should have a 'get_encoder' function defined".format(self) - assert callable(self.get_encoder), "{} should be a method".format(self.get_encoder) + assert hasattr(self, "get_encoder"), f"{self} should have a 'get_encoder' function defined" + assert callable(self.get_encoder), f"{self.get_encoder} should be a method" # get encoder and store encoder outputs encoder = self.get_encoder() @@ -763,7 +761,7 @@ class TFGenerationMixin: if done[batch_idx]: assert ( len(generated_hyps[batch_idx]) >= num_beams - ), "Batch can only be done if at least {} beams have been generated".format(num_beams) + ), f"Batch can only be done if at least {num_beams} beams have been generated." assert ( eos_token_id is not None and pad_token_id is not None ), "generated beams >= num_beams -> eos_token_id and pad_token have to be defined" @@ -843,12 +841,14 @@ class TFGenerationMixin: if eos_token_id is not None and all( (token_id % vocab_size).numpy().item() != eos_token_id for token_id in next_tokens[batch_idx] ): - assert tf.reduce_all( + if not tf.reduce_all( next_scores[batch_idx, :num_beams] == tf.reshape(beam_scores, (batch_size, num_beams))[batch_idx] - ), "If batch_idx is not done, final next scores: {} have to equal to accumulated beam_scores: {}".format( - next_scores[:, :num_beams][batch_idx], tf.reshape(beam_scores, (batch_size, num_beams))[batch_idx] - ) - + ): + raise ValueError( + f"If batch_idx is not done, final next scores: {next_scores[:, :num_beams][batch_idx]} have " + "to equal to accumulated beam_scores: " + f"{tf.reshape(beam_scores, (batch_size, num_beams))[batch_idx]}" + ) # need to add best num_beams hypotheses to generated hyps for beam_id in range(num_beams): effective_beam_id = batch_idx * num_beams + beam_id @@ -871,9 +871,9 @@ class TFGenerationMixin: best_hyp = sorted_hyps.pop()[1] sent_lengths_list.append(len(best_hyp)) best.append(best_hyp) - assert output_batch_size == len(best), "Output batch size {} must match output beam hypotheses {}".format( - output_batch_size, len(best) - ) + assert output_batch_size == len( + best + ), f"Output batch size {output_batch_size} must match output beam hypotheses {len(best)}" sent_lengths = tf.convert_to_tensor(sent_lengths_list, dtype=tf.int32) @@ -992,9 +992,9 @@ def calc_banned_bad_words_ids(prev_input_ids, bad_words_ids): banned_tokens_slice = [] for banned_token_seq in bad_words_ids: - assert len(banned_token_seq) > 0, "Banned words token sequences {} cannot have an empty list".format( - bad_words_ids - ) + assert ( + len(banned_token_seq) > 0 + ), f"Banned words token sequences { bad_words_ids} cannot have an empty list" if _tokens_match(prev_input_ids_slice.numpy().tolist(), banned_token_seq[:-1]) is False: # if tokens do not match continue diff --git a/src/transformers/hf_api.py b/src/transformers/hf_api.py index dfee5f8800..26a6d208af 100644 --- a/src/transformers/hf_api.py +++ b/src/transformers/hf_api.py @@ -83,7 +83,7 @@ class HfApi: Throws: requests.exceptions.HTTPError if credentials are invalid """ - path = "{}/api/login".format(self.endpoint) + path = f"{self.endpoint}/api/login" r = requests.post(path, json={"username": username, "password": password}) r.raise_for_status() d = r.json() @@ -93,8 +93,8 @@ class HfApi: """ Call HF API to know "whoami" """ - path = "{}/api/whoami".format(self.endpoint) - r = requests.get(path, headers={"authorization": "Bearer {}".format(token)}) + path = f"{self.endpoint}/api/whoami" + r = requests.get(path, headers={"authorization": f"Bearer {token}"}) r.raise_for_status() d = r.json() return d["user"], d["orgs"] @@ -103,15 +103,15 @@ class HfApi: """ Call HF API to log out. """ - path = "{}/api/logout".format(self.endpoint) - r = requests.post(path, headers={"authorization": "Bearer {}".format(token)}) + path = f"{self.endpoint}/api/logout" + r = requests.post(path, headers={"authorization": f"Bearer {token}"}) r.raise_for_status() def model_list(self) -> List[ModelInfo]: """ Get the public list of all the models on huggingface.co """ - path = "{}/api/models".format(self.endpoint) + path = f"{self.endpoint}/api/models" r = requests.get(path) r.raise_for_status() d = r.json() @@ -123,9 +123,9 @@ class HfApi: Call HF API to list all stored files for user (or one of their organizations). """ - path = "{}/api/repos/ls".format(self.endpoint) + path = f"{self.endpoint}/api/repos/ls" params = {"organization": organization} if organization is not None else None - r = requests.get(path, params=params, headers={"authorization": "Bearer {}".format(token)}) + r = requests.get(path, params=params, headers={"authorization": f"Bearer {token}"}) r.raise_for_status() d = r.json() return [RepoObj(**x) for x in d] @@ -151,13 +151,13 @@ class HfApi: lfsmultipartthresh: Optional: internal param for testing purposes. """ - path = "{}/api/repos/create".format(self.endpoint) + path = f"{self.endpoint}/api/repos/create" json = {"name": name, "organization": organization, "private": private} if lfsmultipartthresh is not None: json["lfsmultipartthresh"] = lfsmultipartthresh r = requests.post( path, - headers={"authorization": "Bearer {}".format(token)}, + headers={"authorization": f"Bearer {token}"}, json=json, ) if exist_ok and r.status_code == 409: @@ -174,10 +174,10 @@ class HfApi: CAUTION(this is irreversible). """ - path = "{}/api/repos/delete".format(self.endpoint) + path = f"{self.endpoint}/api/repos/delete" r = requests.delete( path, - headers={"authorization": "Bearer {}".format(token)}, + headers={"authorization": f"Bearer {token}"}, json={"name": name, "organization": organization}, ) r.raise_for_status() diff --git a/src/transformers/hf_argparser.py b/src/transformers/hf_argparser.py index cb0a5675fa..4326a589d6 100644 --- a/src/transformers/hf_argparser.py +++ b/src/transformers/hf_argparser.py @@ -123,7 +123,7 @@ class HfArgumentParser(ArgumentParser): kwargs["type"] = field.type.__args__[0] assert all( x == kwargs["type"] for x in field.type.__args__ - ), "{} cannot be a List of mixed types".format(field.name) + ), f"{field.name} cannot be a List of mixed types" if field.default_factory is not dataclasses.MISSING: kwargs["default"] = field.default_factory() elif field.default is dataclasses.MISSING: diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index cdde91021b..57336f8fe7 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -533,12 +533,9 @@ class TensorBoardCallback(TrainerCallback): else: logger.warning( "Trainer is attempting to log a value of " - '"%s" of type %s for key "%s" as a scalar. ' + f'"{v}" of type {type(v)} for key "{k}" as a scalar. ' "This invocation of Tensorboard's writer.add_scalar() " - "is incorrect so we dropped this attribute.", - v, - type(v), - k, + "is incorrect so we dropped this attribute." ) self.tb_writer.flush() diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py index d5063eacf6..38316de881 100644 --- a/src/transformers/modelcard.py +++ b/src/transformers/modelcard.py @@ -65,7 +65,7 @@ class ModelCard: try: setattr(self, key, value) except AttributeError as err: - logger.error("Can't set {} with value {} for {}".format(key, value, self)) + logger.error(f"Can't set {key} with value {value} for {self}") raise err def save_pretrained(self, save_directory_or_file): @@ -77,7 +77,7 @@ class ModelCard: output_model_card_file = save_directory_or_file self.to_json_file(output_model_card_file) - logger.info("Model card saved in {}".format(output_model_card_file)) + logger.info(f"Model card saved in {output_model_card_file}") @classmethod def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): @@ -161,11 +161,9 @@ class ModelCard: model_card_file, cache_dir=cache_dir, proxies=proxies, user_agent=user_agent ) if resolved_model_card_file == model_card_file: - logger.info("loading model card file {}".format(model_card_file)) + logger.info(f"loading model card file {model_card_file}") else: - logger.info( - "loading model card file {} from cache at {}".format(model_card_file, resolved_model_card_file) - ) + logger.info(f"loading model card file {model_card_file} from cache at {resolved_model_card_file}") # Load model card modelcard = cls.from_json_file(resolved_model_card_file) @@ -182,7 +180,7 @@ class ModelCard: for key in to_remove: kwargs.pop(key, None) - logger.info("Model card: %s", str(modelcard)) + logger.info(f"Model card: {modelcard}") if return_unused_kwargs: return modelcard, kwargs else: diff --git a/src/transformers/modeling_flax_pytorch_utils.py b/src/transformers/modeling_flax_pytorch_utils.py index 31001b88ee..f1bc431c6c 100644 --- a/src/transformers/modeling_flax_pytorch_utils.py +++ b/src/transformers/modeling_flax_pytorch_utils.py @@ -43,10 +43,10 @@ def load_pytorch_checkpoint_in_flax_state_dict(flax_model, pytorch_checkpoint_pa raise pt_path = os.path.abspath(pytorch_checkpoint_path) - logger.info("Loading PyTorch weights from {}".format(pt_path)) + logger.info(f"Loading PyTorch weights from {pt_path}") pt_state_dict = torch.load(pt_path, map_location="cpu") - logger.info("PyTorch checkpoint contains {sum(t.numel() for t in pt_state_dict.values())} parameters.") + logger.info(f"PyTorch checkpoint contains {sum(t.numel() for t in pt_state_dict.values()):,} parameters.") flax_state_dict = convert_pytorch_state_dict_to_flax(pt_state_dict, flax_model) diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index 8815f700a2..c425f1a000 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -270,10 +270,8 @@ class FlaxPreTrainedModel(ABC): archive_file = os.path.join(pretrained_model_name_or_path, FLAX_WEIGHTS_NAME) else: raise EnvironmentError( - "Error no file named {} found in directory {} or `from_pt` set to False".format( - [FLAX_WEIGHTS_NAME, WEIGHTS_NAME], - pretrained_model_name_or_path, - ) + f"Error no file named {[FLAX_WEIGHTS_NAME, WEIGHTS_NAME]} found in directory " + f"{pretrained_model_name_or_path} or `from_pt` set to False" ) elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): archive_file = pretrained_model_name_or_path @@ -382,7 +380,7 @@ class FlaxPreTrainedModel(ABC): Directory to which to save. Will be created if it doesn't exist. """ if os.path.isfile(save_directory): - logger.error("Provided path ({}) should be a directory, not a file".format(save_directory)) + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") return os.makedirs(save_directory, exist_ok=True) diff --git a/src/transformers/modeling_tf_pytorch_utils.py b/src/transformers/modeling_tf_pytorch_utils.py index 22a0f1a882..3a36b41f9e 100644 --- a/src/transformers/modeling_tf_pytorch_utils.py +++ b/src/transformers/modeling_tf_pytorch_utils.py @@ -98,10 +98,10 @@ def load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_path, tf_i raise pt_path = os.path.abspath(pytorch_checkpoint_path) - logger.info("Loading PyTorch weights from {}".format(pt_path)) + logger.info(f"Loading PyTorch weights from {pt_path}") pt_state_dict = torch.load(pt_path, map_location="cpu") - logger.info("PyTorch checkpoint contains {:,} parameters".format(sum(t.numel() for t in pt_state_dict.values()))) + logger.info(f"PyTorch checkpoint contains {sum(t.numel() for t in pt_state_dict.values()):,} parameters") return load_pytorch_weights_in_tf2_model( tf_model, pt_state_dict, tf_inputs=tf_inputs, allow_missing_keys=allow_missing_keys @@ -178,7 +178,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a if any(re.search(pat, name) is not None for pat in tf_model._keys_to_ignore_on_load_missing): continue - raise AttributeError("{} not found in PyTorch model".format(name)) + raise AttributeError(f"{name} not found in PyTorch model") array = pt_state_dict[name].numpy() @@ -204,7 +204,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a raise e tf_loaded_numel += array.size - # logger.warning("Initialize TF weight {}".format(symbolic_weight.name)) + # logger.warning(f"Initialize TF weight {symbolic_weight.name}") weight_value_tuples.append((symbolic_weight, array)) all_pytorch_weights.discard(name) @@ -214,7 +214,7 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a if tf_inputs is not None: tf_model(tf_inputs, training=False) # Make sure restore ops are run - logger.info("Loaded {:,} parameters in the TF 2.0 model.".format(tf_loaded_numel)) + logger.info(f"Loaded {tf_loaded_numel:,} parameters in the TF 2.0 model.") unexpected_keys = list(all_pytorch_weights) @@ -276,7 +276,7 @@ def load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path, tf_inputs from .modeling_tf_utils import load_tf_weights - logger.info("Loading TensorFlow weights from {}".format(tf_checkpoint_path)) + logger.info(f"Loading TensorFlow weights from {tf_checkpoint_path}") # Instantiate and load the associated TF 2.0 model tf_model_class_name = "TF" + pt_model.__class__.__name__ # Add "TF" at the beginning @@ -346,7 +346,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F missing_keys_pt.append(pt_weight_name) continue - raise AttributeError("{} not found in TF 2.0 model".format(pt_weight_name)) + raise AttributeError(f"{pt_weight_name} not found in TF 2.0 model") array, transpose = tf_weights_map[pt_weight_name] @@ -371,7 +371,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F e.args += (pt_weight.shape, array.shape) raise e - # logger.warning("Initialize PyTorch weight {}".format(pt_weight_name)) + # logger.warning(f"Initialize PyTorch weight {pt_weight_name}") new_pt_params_dict[pt_weight_name] = torch.from_numpy(array) loaded_pt_weights_data_ptr[pt_weight.data_ptr()] = torch.from_numpy(array) @@ -404,6 +404,6 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F f"you can already use {pt_model.__class__.__name__} for predictions without further training." ) - logger.info("Weights or buffers not loaded from TF 2.0 model: {}".format(all_tf_weights)) + logger.info(f"Weights or buffers not loaded from TF 2.0 model: {all_tf_weights}") return pt_model diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index cf71b25a1a..36e2b403b4 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -632,11 +632,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): super().__init__(*inputs, **kwargs) if not isinstance(config, PretrainedConfig): raise ValueError( - "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. " - "To create a model from a pretrained model use " - "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( - self.__class__.__name__, self.__class__.__name__ - ) + f"Parameter config in `{self.__class__.__name__}(config)` should be an instance of class " + "`PretrainedConfig`. To create a model from a pretrained model use " + f"`model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`" ) # Save config and origin of the pretrained weights if given in model self.config = config @@ -1027,7 +1025,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): https://www.tensorflow.org/tfx/serving/serving_basic """ if os.path.isfile(save_directory): - logger.error("Provided path ({}) should be a directory, not a file".format(save_directory)) + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") return os.makedirs(save_directory, exist_ok=True) @@ -1042,7 +1040,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): # If we save using the predefined names, we can load using `from_pretrained` output_model_file = os.path.join(save_directory, TF2_WEIGHTS_NAME) self.save_weights(output_model_file) - logger.info("Model weights saved in {}".format(output_model_file)) + logger.info(f"Model weights saved in {output_model_file}") @classmethod def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): @@ -1207,9 +1205,8 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): archive_file = os.path.join(pretrained_model_name_or_path, TF2_WEIGHTS_NAME) else: raise EnvironmentError( - "Error no file named {} found in directory {} or `from_pt` set to False".format( - [WEIGHTS_NAME, TF2_WEIGHTS_NAME], pretrained_model_name_or_path - ) + f"Error no file named {[WEIGHTS_NAME, TF2_WEIGHTS_NAME]} found in directory " + f"{pretrained_model_name_or_path} or `from_pt` set to False" ) elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): archive_file = pretrained_model_name_or_path @@ -1244,9 +1241,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): ) raise EnvironmentError(msg) if resolved_archive_file == archive_file: - logger.info("loading weights file {}".format(archive_file)) + logger.info(f"loading weights file {archive_file}") else: - logger.info("loading weights file {} from cache at {}".format(archive_file, resolved_archive_file)) + logger.info(f"loading weights file {archive_file} from cache at {resolved_archive_file}") else: resolved_archive_file = None @@ -1273,7 +1270,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin): else: model(model.dummy_inputs) # build the network with dummy inputs - assert os.path.isfile(resolved_archive_file), "Error retrieving file {}".format(resolved_archive_file) + assert os.path.isfile(resolved_archive_file), f"Error retrieving file {resolved_archive_file}" # 'by_name' allow us to do transfer learning by skipping/adding layers # see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1339-L1357 try: @@ -1442,7 +1439,7 @@ class TFSharedEmbeddings(tf.keras.layers.Layer): elif mode == "linear": return self._linear(inputs) else: - raise ValueError("mode {} is not valid.".format(mode)) + raise ValueError(f"mode {mode} is not valid.") def _embedding(self, input_ids): """Applies embedding based on inputs tensor.""" diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 3846f524a8..fdc2ea1dc7 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -211,9 +211,7 @@ class ModuleUtilsMixin: encoder_extended_attention_mask = (1.0 - encoder_extended_attention_mask) * -1e9 else: raise ValueError( - "{} not recognized. `dtype` should be set to either `torch.float32` or `torch.float16`".format( - self.dtype - ) + f"{self.dtype} not recognized. `dtype` should be set to either `torch.float32` or `torch.float16`" ) return encoder_extended_attention_mask @@ -266,9 +264,7 @@ class ModuleUtilsMixin: extended_attention_mask = attention_mask[:, None, None, :] else: raise ValueError( - "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format( - input_shape, attention_mask.shape - ) + f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})" ) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for @@ -439,11 +435,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin): super().__init__() if not isinstance(config, PretrainedConfig): raise ValueError( - "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. " - "To create a model from a pretrained model use " - "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( - self.__class__.__name__, self.__class__.__name__ - ) + f"Parameter config in `{self.__class__.__name__}(config)` should be an instance of class " + "`PretrainedConfig`. To create a model from a pretrained model use " + f"`model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`" ) # Save config and origin of the pretrained weights if given in model self.config = config @@ -834,7 +828,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin): output_model_file = os.path.join(save_directory, WEIGHTS_NAME) save_function(state_dict, output_model_file) - logger.info("Model weights saved in {}".format(output_model_file)) + logger.info(f"Model weights saved in {output_model_file}") @classmethod def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs): @@ -1053,9 +1047,9 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin): raise EnvironmentError(msg) if resolved_archive_file == archive_file: - logger.info("loading weights file {}".format(archive_file)) + logger.info(f"loading weights file {archive_file}") else: - logger.info("loading weights file {} from cache at {}".format(archive_file, resolved_archive_file)) + logger.info(f"loading weights file {archive_file} from cache at {resolved_archive_file}") else: resolved_archive_file = None @@ -1185,11 +1179,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin): f"you can already use {model.__class__.__name__} for predictions without further training." ) if len(error_msgs) > 0: - raise RuntimeError( - "Error(s) in loading state_dict for {}:\n\t{}".format( - model.__class__.__name__, "\n\t".join(error_msgs) - ) - ) + error_msg = "\n\t".join(error_msgs) + raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}") # make sure token embedding weights are still tied if needed model.tie_weights() @@ -1754,7 +1745,7 @@ def prune_layer( elif isinstance(layer, Conv1D): return prune_conv1d_layer(layer, index, dim=1 if dim is None else dim) else: - raise ValueError("Can't prune layer of class {}".format(layer.__class__)) + raise ValueError(f"Can't prune layer of class {layer.__class__}") def apply_chunking_to_forward( @@ -1793,7 +1784,7 @@ def apply_chunking_to_forward( return apply_chunking_to_forward(self.forward_chunk, self.chunk_size_lm_head, self.seq_len_dim, hidden_states) """ - assert len(input_tensors) > 0, "{} has to be a tuple/list of tensors".format(input_tensors) + assert len(input_tensors) > 0, f"{input_tensors} has to be a tuple/list of tensors" tensor_shape = input_tensors[0].shape[chunk_dim] assert all( input_tensor.shape[chunk_dim] == tensor_shape for input_tensor in input_tensors @@ -1801,18 +1792,18 @@ def apply_chunking_to_forward( # inspect.signature exist since python 3.5 and is a python method -> no problem with backward compatibility num_args_in_forward_chunk_fn = len(inspect.signature(forward_fn).parameters) - assert num_args_in_forward_chunk_fn == len( - input_tensors - ), "forward_chunk_fn expects {} arguments, but only {} input tensors are given".format( - num_args_in_forward_chunk_fn, len(input_tensors) - ) + if num_args_in_forward_chunk_fn != len(input_tensors): + raise ValueError( + f"forward_chunk_fn expects {num_args_in_forward_chunk_fn} arguments, but only {len(input_tensors)} input " + "tensors are given" + ) if chunk_size > 0: - assert ( - input_tensors[0].shape[chunk_dim] % chunk_size == 0 - ), "The dimension to be chunked {} has to be a multiple of the chunk size {}".format( - input_tensors[0].shape[chunk_dim], chunk_size - ) + if input_tensors[0].shape[chunk_dim] % chunk_size != 0: + raise ValueError( + f"The dimension to be chunked {input_tensors[0].shape[chunk_dim]} has to be a multiple of the chunk " + f"size {chunk_size}" + ) num_chunks = input_tensors[0].shape[chunk_dim] // chunk_size diff --git a/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py index 10c018170f..ebfc81eb28 100644 --- a/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py @@ -29,14 +29,14 @@ logging.set_verbosity_info() def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path): # Initialise PyTorch model config = AlbertConfig.from_json_file(albert_config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = AlbertForPreTraining(config) # Load weights from tf checkpoint load_tf_weights_in_albert(model, config, tf_checkpoint_path) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path) diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index 2e20923b7b..21da03fd7a 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -84,13 +84,13 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) @@ -152,7 +152,7 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path): or "AdamWeightDecayOptimizer_1" in name or "global_step" in name ): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue pointer = model @@ -174,7 +174,7 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path): try: pointer = getattr(pointer, scope_names[0]) except AttributeError: - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue if len(scope_names) >= 2: num = int(scope_names[1]) @@ -191,7 +191,7 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - print("Initialize PyTorch weight {} from {}".format(name, original_name)) + print(f"Initialize PyTorch weight {name} from {original_name}") pointer.data = torch.from_numpy(array) return model @@ -252,8 +252,8 @@ class AlbertAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads}" ) self.num_attention_heads = config.num_attention_heads diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 189867addc..64be5062c8 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -338,7 +338,7 @@ class TFAlbertLayerGroup(tf.keras.layers.Layer): super().__init__(**kwargs) self.albert_layers = [ - TFAlbertLayer(config, name="albert_layers_._{}".format(i)) for i in range(config.inner_group_num) + TFAlbertLayer(config, name=f"albert_layers_._{i}") for i in range(config.inner_group_num) ] def call( @@ -390,8 +390,7 @@ class TFAlbertTransformer(tf.keras.layers.Layer): name="embedding_hidden_mapping_in", ) self.albert_layer_groups = [ - TFAlbertLayerGroup(config, name="albert_layer_groups_._{}".format(i)) - for i in range(config.num_hidden_groups) + TFAlbertLayerGroup(config, name=f"albert_layer_groups_._{i}") for i in range(config.num_hidden_groups) ] def call( diff --git a/src/transformers/models/albert/tokenization_albert.py b/src/transformers/models/albert/tokenization_albert.py index c51e30bb99..a271f86064 100644 --- a/src/transformers/models/albert/tokenization_albert.py +++ b/src/transformers/models/albert/tokenization_albert.py @@ -311,7 +311,7 @@ class AlbertTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/albert/tokenization_albert_fast.py b/src/transformers/models/albert/tokenization_albert_fast.py index 40b80f0142..1d6e82b12d 100644 --- a/src/transformers/models/albert/tokenization_albert_fast.py +++ b/src/transformers/models/albert/tokenization_albert_fast.py @@ -248,7 +248,7 @@ class AlbertTokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index ac5f473171..9636d7a5ef 100644 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -310,9 +310,7 @@ class AutoConfig: config_class = CONFIG_MAPPING[model_type] return config_class(*args, **kwargs) raise ValueError( - "Unrecognized model identifier: {}. Should contain one of {}".format( - model_type, ", ".join(CONFIG_MAPPING.keys()) - ) + f"Unrecognized model identifier: {model_type}. Should contain one of {', '.join(CONFIG_MAPPING.keys())}" ) @classmethod @@ -404,7 +402,7 @@ class AutoConfig: return config_class.from_dict(config_dict, **kwargs) raise ValueError( - "Unrecognized model in {}. " + f"Unrecognized model in {pretrained_model_name_or_path}. " "Should have a `model_type` key in its config.json, or contain one of the following strings " - "in its name: {}".format(pretrained_model_name_or_path, ", ".join(CONFIG_MAPPING.keys())) + f"in its name: {', '.join(CONFIG_MAPPING.keys())}" ) diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index 22b895309e..600c8ece2d 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -798,10 +798,8 @@ class AutoModel: if type(config) in MODEL_MAPPING.keys(): return MODEL_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_MAPPING.keys())}." ) @classmethod @@ -841,10 +839,8 @@ class AutoModel: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_MAPPING.keys())}." ) @@ -893,10 +889,8 @@ class AutoModelForPreTraining: if type(config) in MODEL_FOR_PRETRAINING_MAPPING.keys(): return MODEL_FOR_PRETRAINING_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_PRETRAINING_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_PRETRAINING_MAPPING.keys())}." ) @classmethod @@ -936,10 +930,8 @@ class AutoModelForPreTraining: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_PRETRAINING_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_PRETRAINING_MAPPING.keys())}." ) @@ -999,10 +991,8 @@ class AutoModelWithLMHead: if type(config) in MODEL_WITH_LM_HEAD_MAPPING.keys(): return MODEL_WITH_LM_HEAD_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_WITH_LM_HEAD_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_WITH_LM_HEAD_MAPPING.keys())}." ) @classmethod @@ -1048,10 +1038,8 @@ class AutoModelWithLMHead: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_WITH_LM_HEAD_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_WITH_LM_HEAD_MAPPING.keys())}." ) @@ -1099,10 +1087,8 @@ class AutoModelForCausalLM: if type(config) in MODEL_FOR_CAUSAL_LM_MAPPING.keys(): return MODEL_FOR_CAUSAL_LM_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys())}." ) @classmethod @@ -1142,10 +1128,8 @@ class AutoModelForCausalLM: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys())}." ) @@ -1193,10 +1177,8 @@ class AutoModelForMaskedLM: if type(config) in MODEL_FOR_MASKED_LM_MAPPING.keys(): return MODEL_FOR_MASKED_LM_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_MASKED_LM_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_MASKED_LM_MAPPING.keys())}." ) @classmethod @@ -1236,10 +1218,8 @@ class AutoModelForMaskedLM: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_MASKED_LM_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_MASKED_LM_MAPPING.keys())}." ) @@ -1288,12 +1268,8 @@ class AutoModelForSeq2SeqLM: if type(config) in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys(): return MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys())}." ) @classmethod @@ -1333,12 +1309,8 @@ class AutoModelForSeq2SeqLM: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys())}." ) @@ -1387,12 +1359,8 @@ class AutoModelForSequenceClassification: if type(config) in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys(): return MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys())}." ) @classmethod @@ -1432,12 +1400,8 @@ class AutoModelForSequenceClassification: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys())}." ) @@ -1485,12 +1449,8 @@ class AutoModelForQuestionAnswering: return MODEL_FOR_QUESTION_ANSWERING_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys())}." ) @classmethod @@ -1531,12 +1491,8 @@ class AutoModelForQuestionAnswering: ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys())}." ) @@ -1586,12 +1542,8 @@ class AutoModelForTableQuestionAnswering: return MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING.keys())}." ) @classmethod @@ -1632,12 +1584,8 @@ class AutoModelForTableQuestionAnswering: ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING.keys())}." ) @@ -1685,12 +1633,8 @@ class AutoModelForTokenClassification: return MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys())}." ) @classmethod @@ -1731,12 +1675,8 @@ class AutoModelForTokenClassification: ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys())}." ) @@ -1786,12 +1726,8 @@ class AutoModelForMultipleChoice: return MODEL_FOR_MULTIPLE_CHOICE_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys())}." ) @classmethod @@ -1832,12 +1768,8 @@ class AutoModelForMultipleChoice: ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys())}." ) @@ -1887,12 +1819,8 @@ class AutoModelForNextSentencePrediction: return MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys())}." ) @classmethod @@ -1933,10 +1861,6 @@ class AutoModelForNextSentencePrediction: ) raise ValueError( - "Unrecognized configuration class {} for this kind of AutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys())}." ) diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index ece15c0445..62df0925c7 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -590,10 +590,8 @@ class TFAutoModel(object): if type(config) in TF_MODEL_MAPPING.keys(): return TF_MODEL_MAPPING[type(config)](config, **kwargs) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_MAPPING.keys())}." ) @classmethod @@ -633,10 +631,8 @@ class TFAutoModel(object): pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_MAPPING.keys())}." ) @@ -685,10 +681,8 @@ class TFAutoModelForPreTraining(object): if type(config) in TF_MODEL_FOR_PRETRAINING_MAPPING.keys(): return TF_MODEL_FOR_PRETRAINING_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_FOR_PRETRAINING_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_PRETRAINING_MAPPING.keys())}." ) @classmethod @@ -728,10 +722,8 @@ class TFAutoModelForPreTraining(object): pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_FOR_PRETRAINING_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_PRETRAINING_MAPPING.keys())}." ) @@ -791,10 +783,8 @@ class TFAutoModelWithLMHead(object): if type(config) in TF_MODEL_WITH_LM_HEAD_MAPPING.keys(): return TF_MODEL_WITH_LM_HEAD_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_WITH_LM_HEAD_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_WITH_LM_HEAD_MAPPING.keys())}." ) @classmethod @@ -840,10 +830,8 @@ class TFAutoModelWithLMHead(object): pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_WITH_LM_HEAD_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_WITH_LM_HEAD_MAPPING.keys())}." ) @@ -891,10 +879,8 @@ class TFAutoModelForCausalLM: if type(config) in TF_MODEL_FOR_CAUSAL_LM_MAPPING.keys(): return TF_MODEL_FOR_CAUSAL_LM_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_FOR_CAUSAL_LM_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_CAUSAL_LM_MAPPING.keys())}." ) @classmethod @@ -934,10 +920,8 @@ class TFAutoModelForCausalLM: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_FOR_CAUSAL_LM_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_CAUSAL_LM_MAPPING.keys())}." ) @@ -985,10 +969,8 @@ class TFAutoModelForMaskedLM: if type(config) in TF_MODEL_FOR_MASKED_LM_MAPPING.keys(): return TF_MODEL_FOR_MASKED_LM_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_FOR_MASKED_LM_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_MASKED_LM_MAPPING.keys())}." ) @classmethod @@ -1028,10 +1010,8 @@ class TFAutoModelForMaskedLM: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, cls.__name__, ", ".join(c.__name__ for c in TF_MODEL_FOR_MASKED_LM_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_MASKED_LM_MAPPING.keys())}." ) @@ -1080,12 +1060,8 @@ class TFAutoModelForSeq2SeqLM: if type(config) in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys(): return TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING[type(config)](config, **kwargs) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys())}." ) @classmethod @@ -1125,12 +1101,8 @@ class TFAutoModelForSeq2SeqLM: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys())}." ) @@ -1179,12 +1151,8 @@ class TFAutoModelForSequenceClassification(object): if type(config) in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys(): return TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys())}." ) @classmethod @@ -1224,12 +1192,8 @@ class TFAutoModelForSequenceClassification(object): pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.keys())}." ) @@ -1277,12 +1241,8 @@ class TFAutoModelForQuestionAnswering(object): if type(config) in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys(): return TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys())}." ) @classmethod @@ -1322,12 +1282,8 @@ class TFAutoModelForQuestionAnswering(object): pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys())}." ) @@ -1374,12 +1330,8 @@ class TFAutoModelForTokenClassification: if type(config) in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys(): return TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys())}." ) @classmethod @@ -1419,12 +1371,8 @@ class TFAutoModelForTokenClassification: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys())}." ) @@ -1473,12 +1421,8 @@ class TFAutoModelForMultipleChoice: if type(config) in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys(): return TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys())}." ) @classmethod @@ -1518,12 +1462,8 @@ class TFAutoModelForMultipleChoice: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.keys())}." ) @@ -1572,12 +1512,8 @@ class TFAutoModelForNextSentencePrediction: if type(config) in TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys(): return TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING[type(config)](config) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys())}." ) @classmethod @@ -1617,10 +1553,6 @@ class TFAutoModelForNextSentencePrediction: pretrained_model_name_or_path, *model_args, config=config, **kwargs ) raise ValueError( - "Unrecognized configuration class {} for this kind of TFAutoModel: {}.\n" - "Model type should be one of {}.".format( - config.__class__, - cls.__name__, - ", ".join(c.__name__ for c in TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys()), - ) + f"Unrecognized configuration class {config.__class__} for this kind of TFAutoModel: {cls.__name__}.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING.keys())}." ) diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index 06985c129a..c4f28a43d0 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -402,7 +402,7 @@ class AutoTokenizer: if tokenizer_class is None: raise ValueError( - "Tokenizer class {} does not exist or is not currently imported.".format(tokenizer_class_candidate) + f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported." ) return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs) @@ -431,8 +431,6 @@ class AutoTokenizer: ) raise ValueError( - "Unrecognized configuration class {} to build an AutoTokenizer.\n" - "Model type should be one of {}.".format( - config.__class__, ", ".join(c.__name__ for c in TOKENIZER_MAPPING.keys()) - ) + f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n" + f"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING.keys())}." ) diff --git a/src/transformers/models/barthez/tokenization_barthez.py b/src/transformers/models/barthez/tokenization_barthez.py index f8061b323b..428f6fec65 100644 --- a/src/transformers/models/barthez/tokenization_barthez.py +++ b/src/transformers/models/barthez/tokenization_barthez.py @@ -256,7 +256,7 @@ class BarthezTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/barthez/tokenization_barthez_fast.py b/src/transformers/models/barthez/tokenization_barthez_fast.py index d61ac07446..1a9610c556 100644 --- a/src/transformers/models/barthez/tokenization_barthez_fast.py +++ b/src/transformers/models/barthez/tokenization_barthez_fast.py @@ -218,7 +218,7 @@ class BarthezTokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py b/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py index c780c0f835..4eaffae3fa 100644 --- a/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py +++ b/src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py @@ -38,14 +38,14 @@ logger = logging.get_logger(__name__) def load_tf2_weights_in_bert(model, tf_checkpoint_path, config): tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] layer_depth = [] for full_name, shape in init_vars: - # logger.info("Loading TF weight {} with shape {}".format(name, shape)) + # logger.info(f"Loading TF weight {name} with shape {shape}") name = full_name.split("/") if full_name == "_CHECKPOINTABLE_OBJECT_GRAPH" or name[0] in ["global_step", "save_counter"]: logger.info(f"Skipping non-model layer {full_name}") diff --git a/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py index d1cb69a2eb..19850bc431 100755 --- a/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py @@ -29,14 +29,14 @@ logging.set_verbosity_info() def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model config = BertConfig.from_json_file(bert_config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = BertForPreTraining(config) # Load weights from tf checkpoint load_tf_weights_in_bert(model, config, tf_checkpoint_path) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path) diff --git a/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py b/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py index 07685f6450..a58240c8c3 100644 --- a/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py +++ b/src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py @@ -65,7 +65,7 @@ def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, model_name def to_tf_var_name(name: str): for patt, repl in iter(var_map): name = name.replace(patt, repl) - return "bert/{}".format(name) + return f"bert/{name}" def create_tf_var(tensor: np.ndarray, name: str, session: tf.Session): tf_dtype = tf.dtypes.as_dtype(tensor.dtype) @@ -84,7 +84,7 @@ def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, model_name tf_var = create_tf_var(tensor=torch_tensor, name=tf_name, session=session) tf.keras.backend.set_value(tf_var, torch_tensor) tf_weight = session.run(tf_var) - print("Successfully created {}: {}".format(tf_name, np.allclose(tf_weight, torch_tensor))) + print(f"Successfully created {tf_name}: {np.allclose(tf_weight, torch_tensor)}") saver = tf.train.Saver(tf.trainable_variables()) saver.save(session, os.path.join(ckpt_dir, model_name.replace("-", "_") + ".ckpt")) diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 88bb089bfb..370af8b47f 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -103,13 +103,13 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) @@ -122,7 +122,7 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path): n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"] for n in name ): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue pointer = model for m_name in name: @@ -142,7 +142,7 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path): try: pointer = getattr(pointer, scope_names[0]) except AttributeError: - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue if len(scope_names) >= 2: num = int(scope_names[1]) @@ -158,7 +158,7 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) return model @@ -215,8 +215,8 @@ class BertSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index d45690fc01..988a6149a1 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -411,7 +411,7 @@ class TFBertEncoder(tf.keras.layers.Layer): def __init__(self, config: BertConfig, **kwargs): super().__init__(**kwargs) - self.layer = [TFBertLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TFBertLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, diff --git a/src/transformers/models/bert/tokenization_bert.py b/src/transformers/models/bert/tokenization_bert.py index 9f818f117b..8f3ecfabf6 100644 --- a/src/transformers/models/bert/tokenization_bert.py +++ b/src/transformers/models/bert/tokenization_bert.py @@ -192,8 +192,8 @@ class BertTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) @@ -343,8 +343,8 @@ class BertTokenizer(PreTrainedTokenizer): for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: vocabulary indices are not consecutive." - " Please check that the vocabulary is not corrupted!".format(vocab_file) + f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!" ) index = token_index writer.write(token + "\n") diff --git a/src/transformers/models/bert_generation/modeling_bert_generation.py b/src/transformers/models/bert_generation/modeling_bert_generation.py index 1954e21e38..57ec9345b5 100755 --- a/src/transformers/models/bert_generation/modeling_bert_generation.py +++ b/src/transformers/models/bert_generation/modeling_bert_generation.py @@ -109,7 +109,7 @@ def load_tf_weights_in_bert_generation( array = np.asarray(sess.run(all_variables[key])) if not is_embedding: - logger.info("Transposing numpy weight of shape {} for {}".format(array.shape, key)) + logger.info(f"Transposing numpy weight of shape {array.shape} for {key}") array = np.transpose(array) else: model_pointer = model_pointer.weight @@ -126,7 +126,7 @@ def load_tf_weights_in_bert_generation( model_pointer.data = torch.from_numpy(array.astype(np.float32)) keep_track_variables.pop(key, None) - logger.info("Weights not copied to PyTorch model: {}".format(", ".join(keep_track_variables.keys()))) + logger.info(f"Weights not copied to PyTorch model: {', '.join(keep_track_variables.keys())}") return model diff --git a/src/transformers/models/bert_generation/tokenization_bert_generation.py b/src/transformers/models/bert_generation/tokenization_bert_generation.py index 747a0b8f99..42b5fcac8e 100644 --- a/src/transformers/models/bert_generation/tokenization_bert_generation.py +++ b/src/transformers/models/bert_generation/tokenization_bert_generation.py @@ -134,7 +134,7 @@ class BertGenerationTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py index ca605930d8..995c944c35 100644 --- a/src/transformers/models/bert_japanese/tokenization_bert_japanese.py +++ b/src/transformers/models/bert_japanese/tokenization_bert_japanese.py @@ -130,8 +130,8 @@ class BertJapaneseTokenizer(BertTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) @@ -151,7 +151,7 @@ class BertJapaneseTokenizer(BertTokenizer): do_lower_case=do_lower_case, never_split=never_split, **(mecab_kwargs or {}) ) else: - raise ValueError("Invalid word_tokenizer_type '{}' is specified.".format(word_tokenizer_type)) + raise ValueError(f"Invalid word_tokenizer_type '{word_tokenizer_type}' is specified.") self.do_subword_tokenize = do_subword_tokenize self.subword_tokenizer_type = subword_tokenizer_type @@ -161,7 +161,7 @@ class BertJapaneseTokenizer(BertTokenizer): elif subword_tokenizer_type == "character": self.subword_tokenizer = CharacterTokenizer(vocab=self.vocab, unk_token=self.unk_token) else: - raise ValueError("Invalid subword_tokenizer_type '{}' is specified.".format(subword_tokenizer_type)) + raise ValueError(f"Invalid subword_tokenizer_type '{subword_tokenizer_type}' is specified.") @property def do_lower_case(self): @@ -279,7 +279,7 @@ class MecabTokenizer: raise ValueError("Invalid mecab_dic is specified.") mecabrc = os.path.join(dic_dir, "mecabrc") - mecab_option = '-d "{}" -r "{}" '.format(dic_dir, mecabrc) + mecab_option + mecab_option = f'-d "{dic_dir}" -r "{mecabrc}" ' + mecab_option self.mecab = fugashi.GenericTagger(mecab_option) diff --git a/src/transformers/models/bertweet/tokenization_bertweet.py b/src/transformers/models/bertweet/tokenization_bertweet.py index c41e82b096..aaeffd7380 100644 --- a/src/transformers/models/bertweet/tokenization_bertweet.py +++ b/src/transformers/models/bertweet/tokenization_bertweet.py @@ -385,7 +385,7 @@ class BertweetTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -419,7 +419,7 @@ class BertweetTokenizer(PreTrainedTokenizer): except FileNotFoundError as fnfe: raise fnfe except UnicodeError: - raise Exception("Incorrect encoding detected in {}, please " "rebuild the dataset".format(f)) + raise Exception(f"Incorrect encoding detected in {f}, please rebuild the dataset") return lines = f.readlines() diff --git a/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py index 7cea701acd..2d400bb828 100644 --- a/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py @@ -27,7 +27,7 @@ logging.set_verbosity_info() def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, big_bird_config_file, pytorch_dump_path, is_trivia_qa): # Initialise PyTorch model config = BigBirdConfig.from_json_file(big_bird_config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") if is_trivia_qa: model = BigBirdForQuestionAnswering(config) diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index 63b61e1948..f7fd54b946 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -122,7 +122,7 @@ def load_tf_weights_in_big_bird(model, tf_checkpoint_path, is_trivia_qa=False): if i >= len(init_vars) - 2: name = name.replace("intermediate", "output") - logger.info("Loading TF weight {} with shape {}".format(name, var.shape)) + logger.info(f"Loading TF weight {name} with shape {var.shape}") array = var.value().numpy() names.append(name) tf_weights[name] = array @@ -141,7 +141,7 @@ def load_tf_weights_in_big_bird(model, tf_checkpoint_path, is_trivia_qa=False): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.saved_model.load(tf_path).variables if is_trivia_qa else tf.train.list_variables(tf_path) @@ -304,8 +304,8 @@ class BigBirdSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads @@ -2171,9 +2171,8 @@ class BigBirdModel(BigBirdPreTrainedModel): padding_len = (block_size - seq_len % block_size) % block_size if padding_len > 0: logger.info( - "Input ids are automatically padded from {} to {} to be a multiple of `config.block_size`: {}".format( - seq_len, seq_len + padding_len, block_size - ) + f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " + f"`config.block_size`: {block_size}" ) if input_ids is not None: input_ids = F.pad(input_ids, (0, padding_len), value=pad_token_id) diff --git a/src/transformers/models/big_bird/tokenization_big_bird.py b/src/transformers/models/big_bird/tokenization_big_bird.py index 650f02dea1..3cafcda189 100644 --- a/src/transformers/models/big_bird/tokenization_big_bird.py +++ b/src/transformers/models/big_bird/tokenization_big_bird.py @@ -164,7 +164,7 @@ class BigBirdTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py index f69e14aa25..1af143f380 100644 --- a/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py @@ -208,7 +208,7 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -226,8 +226,8 @@ class BlenderbotSmallTokenizer(PreTrainedTokenizer): for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: BPE merge indices are not consecutive." - " Please check that the tokenizer is not corrupted!".format(merge_file) + f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive." + " Please check that the tokenizer is not corrupted!" ) index = token_index writer.write(" ".join(bpe_tokens) + "\n") diff --git a/src/transformers/models/camembert/tokenization_camembert.py b/src/transformers/models/camembert/tokenization_camembert.py index 8901ee9a32..eb57acec89 100644 --- a/src/transformers/models/camembert/tokenization_camembert.py +++ b/src/transformers/models/camembert/tokenization_camembert.py @@ -256,7 +256,7 @@ class CamembertTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/camembert/tokenization_camembert_fast.py b/src/transformers/models/camembert/tokenization_camembert_fast.py index a93af73fd2..648da8be70 100644 --- a/src/transformers/models/camembert/tokenization_camembert_fast.py +++ b/src/transformers/models/camembert/tokenization_camembert_fast.py @@ -217,7 +217,7 @@ class CamembertTokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index c31d08a56e..0ededdc83f 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -70,12 +70,12 @@ def load_tf_weights_in_convbert(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) tf_data = {} for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) tf_data[name] = array @@ -285,8 +285,8 @@ class ConvBertSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) new_num_attention_heads = config.num_attention_heads // config.head_ratio diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index d5afa6363e..ddf33098b2 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -147,8 +147,8 @@ class TFConvBertSelfAttention(tf.keras.layers.Layer): if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) new_num_attention_heads = int(config.num_attention_heads / config.head_ratio) @@ -442,7 +442,7 @@ class TFConvBertEncoder(tf.keras.layers.Layer): def __init__(self, config, **kwargs): super().__init__(**kwargs) - self.layer = [TFConvBertLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TFConvBertLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index def747a46d..a4cf3f509c 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -234,7 +234,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): config.resid_pdrop, config.layer_norm_epsilon, self.output_attentions, - name="h_._{}".format(i), + name=f"h_._{i}", ) for i in range(config.n_layer) ] diff --git a/src/transformers/models/ctrl/tokenization_ctrl.py b/src/transformers/models/ctrl/tokenization_ctrl.py index 65df6bbab3..d1adb50087 100644 --- a/src/transformers/models/ctrl/tokenization_ctrl.py +++ b/src/transformers/models/ctrl/tokenization_ctrl.py @@ -226,7 +226,7 @@ class CTRLTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -244,8 +244,8 @@ class CTRLTokenizer(PreTrainedTokenizer): for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: BPE merge indices are not consecutive." - " Please check that the tokenizer is not corrupted!".format(merge_file) + f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive." + " Please check that the tokenizer is not corrupted!" ) index = token_index writer.write(" ".join(bpe_tokens) + "\n") diff --git a/src/transformers/models/deberta/modeling_deberta.py b/src/transformers/models/deberta/modeling_deberta.py index 3d24b9e630..84989fda75 100644 --- a/src/transformers/models/deberta/modeling_deberta.py +++ b/src/transformers/models/deberta/modeling_deberta.py @@ -492,8 +492,8 @@ class DisentangledSelfAttention(torch.nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads self.attention_head_size = int(config.hidden_size / config.num_attention_heads) diff --git a/src/transformers/models/deberta/tokenization_deberta.py b/src/transformers/models/deberta/tokenization_deberta.py index 9e8c849740..ef90b52a3e 100644 --- a/src/transformers/models/deberta/tokenization_deberta.py +++ b/src/transformers/models/deberta/tokenization_deberta.py @@ -549,8 +549,8 @@ class DebertaTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = XxxTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = XxxTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.do_lower_case = do_lower_case self.gpt2_tokenizer = GPT2Tokenizer(vocab_file) diff --git a/src/transformers/models/deberta_v2/modeling_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_deberta_v2.py index 8002eeae52..da73997e1a 100644 --- a/src/transformers/models/deberta_v2/modeling_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_deberta_v2.py @@ -561,8 +561,8 @@ class DisentangledSelfAttention(torch.nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads _attention_head_size = config.hidden_size // config.num_attention_heads diff --git a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py index c7edc10111..a0e80f6b00 100644 --- a/src/transformers/models/deberta_v2/tokenization_deberta_v2.py +++ b/src/transformers/models/deberta_v2/tokenization_deberta_v2.py @@ -107,8 +107,8 @@ class DebertaV2Tokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = DebertaV2Tokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = DebertaV2Tokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.do_lower_case = do_lower_case self.split_by_punct = split_by_punct @@ -481,11 +481,11 @@ def convert_to_unicode(text): elif isinstance(text, bytes): return text.decode("utf-8", "ignore") else: - raise ValueError("Unsupported string type: %s" % (type(text))) + raise ValueError(f"Unsupported string type: {type(text)}") elif six.PY2: if isinstance(text, str): return text.decode("utf-8", "ignore") else: - raise ValueError("Unsupported string type: %s" % (type(text))) + raise ValueError(f"Unsupported string type: {type(text)}") else: raise ValueError("Not running on Python2 or Python 3?") diff --git a/src/transformers/models/distilbert/modeling_distilbert.py b/src/transformers/models/distilbert/modeling_distilbert.py index 65c0def694..911fba8088 100755 --- a/src/transformers/models/distilbert/modeling_distilbert.py +++ b/src/transformers/models/distilbert/modeling_distilbert.py @@ -159,7 +159,7 @@ class MultiHeadSelfAttention(nn.Module): """ bs, q_length, dim = query.size() k_length = key.size(1) - # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim) + # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured' # assert key.size() == value.size() dim_per_head = self.dim // self.n_heads @@ -208,9 +208,7 @@ class FFN(nn.Module): self.seq_len_dim = 1 self.lin1 = nn.Linear(in_features=config.dim, out_features=config.hidden_dim) self.lin2 = nn.Linear(in_features=config.hidden_dim, out_features=config.dim) - assert config.activation in ["relu", "gelu"], "activation ({}) must be in ['relu', 'gelu']".format( - config.activation - ) + assert config.activation in ["relu", "gelu"], f"activation ({config.activation}) must be in ['relu', 'gelu']" self.activation = gelu if config.activation == "gelu" else nn.ReLU() def forward(self, input): diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 9299fdc752..8ec0060ab3 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -168,7 +168,7 @@ class TFMultiHeadSelfAttention(tf.keras.layers.Layer): """ bs, q_length, dim = shape_list(query) k_length = shape_list(key)[1] - # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim) + # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured' # assert key.size() == value.size() dim_per_head = tf.math.divide(self.dim, self.n_heads) dim_per_head = tf.cast(dim_per_head, dtype=tf.int32) @@ -221,9 +221,7 @@ class TFFFN(tf.keras.layers.Layer): self.lin2 = tf.keras.layers.Dense( config.dim, kernel_initializer=get_initializer(config.initializer_range), name="lin2" ) - assert config.activation in ["relu", "gelu"], "activation ({}) must be in ['relu', 'gelu']".format( - config.activation - ) + assert config.activation in ["relu", "gelu"], f"activation ({config.activation}) must be in ['relu', 'gelu']" self.activation = get_tf_activation(config.activation) def call(self, input, training=False): @@ -290,7 +288,7 @@ class TFTransformer(tf.keras.layers.Layer): self.output_hidden_states = config.output_hidden_states self.output_attentions = config.output_attentions - self.layer = [TFTransformerBlock(config, name="layer_._{}".format(i)) for i in range(config.n_layers)] + self.layer = [TFTransformerBlock(config, name=f"layer_._{i}") for i in range(config.n_layers)] def call(self, x, attn_mask, head_mask, output_attentions, output_hidden_states, return_dict, training=False): # docstyle-ignore diff --git a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py index cc10ac002f..c6484581b7 100644 --- a/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py +++ b/src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py @@ -28,7 +28,7 @@ CheckpointState = collections.namedtuple( def load_states_from_checkpoint(model_file: str) -> CheckpointState: - print("Reading saved model from %s", model_file) + print(f"Reading saved model from {model_file}") state_dict = torch.load(model_file, map_location=lambda s, l: default_restore_location(s, "cpu")) return CheckpointState(**state_dict) @@ -55,7 +55,7 @@ class DPRState: class DPRContextEncoderState(DPRState): def load_dpr_model(self): model = DPRContextEncoder(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0])) - print("Loading DPR biencoder from {}".format(self.src_file)) + print(f"Loading DPR biencoder from {self.src_file}") saved_state = load_states_from_checkpoint(self.src_file) encoder, prefix = model.ctx_encoder, "ctx_model." # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3 @@ -73,7 +73,7 @@ class DPRContextEncoderState(DPRState): class DPRQuestionEncoderState(DPRState): def load_dpr_model(self): model = DPRQuestionEncoder(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0])) - print("Loading DPR biencoder from {}".format(self.src_file)) + print(f"Loading DPR biencoder from {self.src_file}") saved_state = load_states_from_checkpoint(self.src_file) encoder, prefix = model.question_encoder, "question_model." # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3 @@ -91,7 +91,7 @@ class DPRQuestionEncoderState(DPRState): class DPRReaderState(DPRState): def load_dpr_model(self): model = DPRReader(DPRConfig(**BertConfig.get_config_dict("bert-base-uncased")[0])) - print("Loading DPR reader from {}".format(self.src_file)) + print(f"Loading DPR reader from {self.src_file}") saved_state = load_states_from_checkpoint(self.src_file) # Fix changes from https://github.com/huggingface/transformers/commit/614fef1691edb806de976756d4948ecbcd0c0ca3 state_dict = { diff --git a/src/transformers/models/dpr/tokenization_dpr.py b/src/transformers/models/dpr/tokenization_dpr.py index cedfe43d21..23bfff9062 100644 --- a/src/transformers/models/dpr/tokenization_dpr.py +++ b/src/transformers/models/dpr/tokenization_dpr.py @@ -239,7 +239,7 @@ class CustomDPRReaderTokenizerMixin: questions = questions if not isinstance(questions, str) else [questions] * n_passages assert len(titles) == len( texts - ), "There should be as many titles than texts but got {} titles and {} texts.".format(len(titles), len(texts)) + ), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts." encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"] encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"] encoded_inputs = { @@ -350,9 +350,9 @@ class CustomDPRReaderTokenizerMixin: scores = sorted(scores, key=lambda x: x[1], reverse=True) chosen_span_intervals = [] for (start_index, end_index), score in scores: - assert start_index <= end_index, "Wrong span indices: [{}:{}]".format(start_index, end_index) + assert start_index <= end_index, f"Wrong span indices: [{start_index}:{end_index}]" length = end_index - start_index + 1 - assert length <= max_answer_length, "Span is too long: {} > {}".format(length, max_answer_length) + assert length <= max_answer_length, f"Span is too long: {length} > {max_answer_length}" if any( [ start_index <= prev_start_index <= prev_end_index <= end_index diff --git a/src/transformers/models/dpr/tokenization_dpr_fast.py b/src/transformers/models/dpr/tokenization_dpr_fast.py index 90ab9c3f74..1f5a37be24 100644 --- a/src/transformers/models/dpr/tokenization_dpr_fast.py +++ b/src/transformers/models/dpr/tokenization_dpr_fast.py @@ -240,7 +240,7 @@ class CustomDPRReaderTokenizerMixin: questions = questions if not isinstance(questions, str) else [questions] * n_passages assert len(titles) == len( texts - ), "There should be as many titles than texts but got {} titles and {} texts.".format(len(titles), len(texts)) + ), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts." encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"] encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"] encoded_inputs = { @@ -351,9 +351,9 @@ class CustomDPRReaderTokenizerMixin: scores = sorted(scores, key=lambda x: x[1], reverse=True) chosen_span_intervals = [] for (start_index, end_index), score in scores: - assert start_index <= end_index, "Wrong span indices: [{}:{}]".format(start_index, end_index) + assert start_index <= end_index, f"Wrong span indices: [{start_index}:{end_index}]" length = end_index - start_index + 1 - assert length <= max_answer_length, "Span is too long: {} > {}".format(length, max_answer_length) + assert length <= max_answer_length, f"Span is too long: {length} > {max_answer_length}" if any( [ start_index <= prev_start_index <= prev_end_index <= end_index diff --git a/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py index 9cbfcf665d..0e8a5c5917 100644 --- a/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py @@ -29,7 +29,7 @@ logging.set_verbosity_info() def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path, discriminator_or_generator): # Initialise PyTorch model config = ElectraConfig.from_json_file(config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") if discriminator_or_generator == "discriminator": model = ElectraForPreTraining(config) @@ -44,7 +44,7 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_du ) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path) diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py index 59605bc428..913d269ad5 100644 --- a/src/transformers/models/electra/modeling_electra.py +++ b/src/transformers/models/electra/modeling_electra.py @@ -83,13 +83,13 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_ ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) @@ -112,7 +112,7 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_ # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v # which are not required for using pretrained model if any(n in ["global_step", "temperature"] for n in name): - logger.info("Skipping {}".format(original_name)) + logger.info(f"Skipping {original_name}") continue pointer = model for m_name in name: @@ -144,10 +144,10 @@ def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discriminator_ except AssertionError as e: e.args += (pointer.shape, array.shape) raise - print("Initialize PyTorch weight {}".format(name), original_name) + print(f"Initialize PyTorch weight {name}", original_name) pointer.data = torch.from_numpy(array) except AttributeError as e: - print("Skipping {}".format(original_name), name, e) + print(f"Skipping {original_name}", name, e) continue return model @@ -206,8 +206,8 @@ class ElectraSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 8a4a77db54..2383df177a 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -285,7 +285,7 @@ class TFElectraEncoder(tf.keras.layers.Layer): def __init__(self, config: ElectraConfig, **kwargs): super().__init__(**kwargs) - self.layer = [TFElectraLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TFElectraLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index 2486521847..f314106677 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -159,9 +159,7 @@ class EncoderDecoderModel(PreTrainedModel): if config is None: config = EncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config) else: - assert isinstance(config, self.config_class), "config: {} has to be of type {}".format( - config, self.config_class - ) + assert isinstance(config, self.config_class), f"config: {config} has to be of type {self.config_class}" # initialize with config super().__init__(config) diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index 646c5da050..da2f2d21c7 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -337,7 +337,7 @@ class TFFlaubertMultiHeadAttention(tf.keras.layers.Layer): else: klen = shape_list(kv)[1] - # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim) + # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured' dim_per_head = self.dim // self.n_heads mask_reshape = (bs, 1, qlen, klen) if len(shape_list(mask)) == 3 else (bs, 1, 1, klen) @@ -450,21 +450,19 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): for i in range(self.n_layers): self.attentions.append( - TFFlaubertMultiHeadAttention(self.n_heads, self.dim, config=config, name="attentions_._{}".format(i)) + TFFlaubertMultiHeadAttention(self.n_heads, self.dim, config=config, name=f"attentions_._{i}") ) self.layer_norm1.append( - tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm1_._{}".format(i)) + tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm1_._{i}") ) # if self.is_decoder: # self.layer_norm15.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps)) # self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout)) self.ffns.append( - TFFlaubertTransformerFFN( - self.dim, self.hidden_dim, self.dim, config=config, name="ffns_._{}".format(i) - ) + TFFlaubertTransformerFFN(self.dim, self.hidden_dim, self.dim, config=config, name=f"ffns_._{i}") ) self.layer_norm2.append( - tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm2_._{}".format(i)) + tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm2_._{i}") ) def build(self, input_shape): diff --git a/src/transformers/models/flaubert/tokenization_flaubert.py b/src/transformers/models/flaubert/tokenization_flaubert.py index 96dc7ad282..ee6c824612 100644 --- a/src/transformers/models/flaubert/tokenization_flaubert.py +++ b/src/transformers/models/flaubert/tokenization_flaubert.py @@ -71,7 +71,7 @@ def convert_to_unicode(text): elif isinstance(s, six.text_type): return s else: - raise TypeError("not expecting type '%s'" % type(s)) + raise TypeError(f"not expecting type '{type(s)}'") return six_ensure_text(text, encoding="utf-8", errors="ignore") diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index f644c6b43d..e1f37a5af5 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -357,7 +357,7 @@ def _make_linear_from_emb(emb): # Helper Functions, mostly for making masks def _check_shapes(shape_1, shape2): if shape_1 != shape2: - raise AssertionError("shape mismatch: {} != {}".format(shape_1, shape2)) + raise AssertionError(f"shape mismatch: {shape_1} != {shape2}") def shift_tokens_right(input_ids, pad_token_id): diff --git a/src/transformers/models/fsmt/tokenization_fsmt.py b/src/transformers/models/fsmt/tokenization_fsmt.py index 30d5a385b8..124a9541d7 100644 --- a/src/transformers/models/fsmt/tokenization_fsmt.py +++ b/src/transformers/models/fsmt/tokenization_fsmt.py @@ -489,7 +489,7 @@ class FSMTTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return src_vocab_file = os.path.join( @@ -514,8 +514,8 @@ class FSMTTokenizer(PreTrainedTokenizer): for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: BPE merge indices are not consecutive." - " Please check that the tokenizer is not corrupted!".format(merges_file) + f"Saving vocabulary to {merges_file}: BPE merge indices are not consecutive." + " Please check that the tokenizer is not corrupted!" ) index = token_index writer.write(" ".join(bpe_tokens) + "\n") diff --git a/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py index dda913c74d..b13d6dcd10 100755 --- a/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py @@ -29,14 +29,14 @@ logging.set_verbosity_info() def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path, base_model): # Initialise PyTorch model config = FunnelConfig.from_json_file(config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = FunnelBaseModel(config) if base_model else FunnelModel(config) # Load weights from tf checkpoint load_tf_weights_in_funnel(model, config, tf_checkpoint_path) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path) diff --git a/src/transformers/models/funnel/modeling_funnel.py b/src/transformers/models/funnel/modeling_funnel.py index a48f7e01b5..1f277498d1 100644 --- a/src/transformers/models/funnel/modeling_funnel.py +++ b/src/transformers/models/funnel/modeling_funnel.py @@ -80,13 +80,13 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) @@ -116,7 +116,7 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path): n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"] for n in name ): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue if name[0] == "generator": continue @@ -143,7 +143,7 @@ def load_tf_weights_in_funnel(model, config, tf_checkpoint_path): try: pointer = getattr(pointer, m_name) except AttributeError: - print("Skipping {}".format("/".join(name)), array.shape) + print(f"Skipping {'/'.join(name)}", array.shape) skipped = True break if not skipped: diff --git a/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py index e5f8be1891..7bc720fa88 100755 --- a/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py @@ -41,9 +41,9 @@ def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config_file, p # Save pytorch-model pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME - print("Save PyTorch model to {}".format(pytorch_weights_dump_path)) + print(f"Save PyTorch model to {pytorch_weights_dump_path}") torch.save(model.state_dict(), pytorch_weights_dump_path) - print("Save configuration file to {}".format(pytorch_config_dump_path)) + print(f"Save configuration file to {pytorch_config_dump_path}") with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string()) diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 4518964052..bcfb8af80b 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -78,13 +78,13 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path): ) raise tf_path = os.path.abspath(gpt2_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array.squeeze()) @@ -117,7 +117,7 @@ def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) return model diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index c2ebb2ebd7..cc7829871a 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -233,7 +233,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): config.vocab_size, config.hidden_size, initializer_range=config.initializer_range, name="wte" ) self.drop = tf.keras.layers.Dropout(config.embd_pdrop) - self.h = [TFBlock(config.n_ctx, config, scale=True, name="h_._{}".format(i)) for i in range(config.n_layer)] + self.h = [TFBlock(config.n_ctx, config, scale=True, name=f"h_._{i}") for i in range(config.n_layer)] self.ln_f = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_epsilon, name="ln_f") def build(self, input_shape): diff --git a/src/transformers/models/gpt2/tokenization_gpt2.py b/src/transformers/models/gpt2/tokenization_gpt2.py index 4601f902e0..e27ad9d3c0 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2.py +++ b/src/transformers/models/gpt2/tokenization_gpt2.py @@ -267,7 +267,7 @@ class GPT2Tokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -285,8 +285,8 @@ class GPT2Tokenizer(PreTrainedTokenizer): for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: BPE merge indices are not consecutive." - " Please check that the tokenizer is not corrupted!".format(merge_file) + f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive." + " Please check that the tokenizer is not corrupted!" ) index = token_index writer.write(" ".join(bpe_tokens) + "\n") diff --git a/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py b/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py index 8378ad5369..1c630fb2d8 100644 --- a/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py +++ b/src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py @@ -38,14 +38,14 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_du embed_dropout=config_json["embed_dropout"], attention_dropout=config_json["attn_dropout"], ) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = GPTNeoForCausalLM(config) # Load weights from tf checkpoint load_tf_weights_in_gpt_neo(model, config, tf_checkpoint_path) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") model.save_pretrained(pytorch_dump_path) diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py index 7abaa9c7aa..9fb0d7475f 100755 --- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py @@ -63,7 +63,7 @@ def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path): ) raise tf_path = os.path.abspath(gpt_neo_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] @@ -119,7 +119,7 @@ def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - print("Initialize PyTorch weight {}".format(name)) + print(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) # init the final linear layer using word embeddings @@ -431,9 +431,8 @@ class GPTNeoAttention(nn.Module): self.attention = GPTNeoLocalSelfAttention(config) else: raise NotImplementedError( - "Only attn layer types 'global' and 'local' exist, but got `config.attention_layers`: {}. Select attn layer types from ['global', 'local'] only.".format( - self.attention_layers - ) + "Only attn layer types 'global' and 'local' exist, but got `config.attention_layers`: " + f"{config.attention_layers}. Select attn layer types from ['global', 'local'] only." ) def forward( diff --git a/src/transformers/models/ibert/modeling_ibert.py b/src/transformers/models/ibert/modeling_ibert.py index abb53305f8..382577a9f0 100644 --- a/src/transformers/models/ibert/modeling_ibert.py +++ b/src/transformers/models/ibert/modeling_ibert.py @@ -179,8 +179,8 @@ class IBertSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.quant_mode = config.quant_mode self.weight_bit = 8 diff --git a/src/transformers/models/ibert/quant_modules.py b/src/transformers/models/ibert/quant_modules.py index 57b054d8b0..add4410ca6 100644 --- a/src/transformers/models/ibert/quant_modules.py +++ b/src/transformers/models/ibert/quant_modules.py @@ -151,11 +151,9 @@ class QuantAct(nn.Module): def __repr__(self): return ( - "{0}(activation_bit={1}, " - "quant_mode: {2}, Act_min: {3:.2f}, " - "Act_max: {4:.2f})".format( - self.__class__.__name__, self.activation_bit, self.quant_mode, self.x_min.item(), self.x_max.item() - ) + f"{self.__class__.__name__}(activation_bit={self.activation_bit}, " + f"quant_mode: {self.activation_bit}, Act_min: {self.x_min.item():.2f}, " + f"Act_max: {self.x_max.item():.2f})" ) def forward( @@ -261,7 +259,7 @@ class QuantLinear(nn.Module): def __repr__(self): s = super().__repr__() - s = "(" + s + " weight_bit={}, quant_mode={})".format(self.weight_bit, self.quant_mode) + s = f"({s} weight_bit={self.weight_bit}, quant_mode={self.quant_mode})" return s def forward(self, x, prev_act_scaling_factor=None): @@ -471,7 +469,7 @@ class IntLayerNorm(nn.Module): shift = (torch.log2(torch.sqrt(var_int / 2 ** self.max_bit)).ceil()).max() shift_old = self.shift self.shift = torch.max(self.shift, shift) - logger.info("Dynamic shift adjustment: {} -> {}".format(int(shift_old), int(self.shift))) + logger.info(f"Dynamic shift adjustment: {int(shift_old)} -> {int(self.shift)}") def overflow_fallback(self, y_int): """ diff --git a/src/transformers/models/layoutlm/modeling_layoutlm.py b/src/transformers/models/layoutlm/modeling_layoutlm.py index 8d6d0a7d15..3211d6a0f2 100644 --- a/src/transformers/models/layoutlm/modeling_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_layoutlm.py @@ -135,8 +135,8 @@ class LayoutLMSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index c3be217c6c..d17924f9f4 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -402,7 +402,7 @@ class TFLayoutLMEncoder(tf.keras.layers.Layer): def __init__(self, config: LayoutLMConfig, **kwargs): super().__init__(**kwargs) - self.layer = [TFLayoutLMLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TFLayoutLMLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index c61a76c58a..38da6e3bdc 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -131,8 +131,8 @@ class LEDEncoderSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_heads = config.num_attention_heads self.head_dim = int(config.hidden_size / config.num_attention_heads) @@ -1673,9 +1673,8 @@ class LEDEncoder(LEDPreTrainedModel): padding_len = (attention_window - seq_len % attention_window) % attention_window if padding_len > 0: logger.info( - "Input ids are automatically padded from {} to {} to be a multiple of `config.attention_window`: {}".format( - seq_len, seq_len + padding_len, attention_window - ) + f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " + f"`config.attention_window`: {attention_window}" ) if input_ids is not None: input_ids = F.pad(input_ids, (0, padding_len), value=pad_token_id) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 3e7c49c9d7..8197a8ad80 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -127,8 +127,8 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads}" ) self.num_heads = config.num_attention_heads @@ -1824,9 +1824,8 @@ class TFLEDEncoder(tf.keras.layers.Layer): if padding_len > 0: logger.info( - "Input ids are automatically padded from {} to {} to be a multiple of `config.attention_window`: {}".format( - seq_len, seq_len + padding_len, attention_window - ) + f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " + f"`config.attention_window`: {attention_window}" ) paddings = tf.convert_to_tensor([[0, 0], [0, padding_len]]) diff --git a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py index 6c310a5faf..40b2f864c8 100644 --- a/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py +++ b/src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py @@ -57,7 +57,7 @@ def convert_longformer_qa_checkpoint_to_pytorch( # save model longformer_for_qa.save_pretrained(pytorch_dump_folder_path) - print("Conversion successful. Model saved under {}".format(pytorch_dump_folder_path)) + print(f"Conversion successful. Model saved under {pytorch_dump_folder_path}") if __name__ == "__main__": diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index c5b29e29a3..65634ca314 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -521,8 +521,8 @@ class LongformerSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_heads = config.num_attention_heads self.head_dim = int(config.hidden_size / config.num_attention_heads) @@ -1542,9 +1542,8 @@ class LongformerModel(LongformerPreTrainedModel): padding_len = (attention_window - seq_len % attention_window) % attention_window if padding_len > 0: logger.info( - "Input ids are automatically padded from {} to {} to be a multiple of `config.attention_window`: {}".format( - seq_len, seq_len + padding_len, attention_window - ) + f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " + f"`config.attention_window`: {attention_window}" ) if input_ids is not None: input_ids = F.pad(input_ids, (0, padding_len), value=pad_token_id) diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 67bd7cd741..6d5f769283 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -646,8 +646,8 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads}" ) self.num_heads = config.num_attention_heads @@ -1518,9 +1518,7 @@ class TFLongformerEncoder(tf.keras.layers.Layer): self.output_hidden_states = config.output_hidden_states self.output_attentions = config.output_attentions - self.layer = [ - TFLongformerLayer(config, i, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers) - ] + self.layer = [TFLongformerLayer(config, i, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, @@ -1780,9 +1778,8 @@ class TFLongformerMainLayer(tf.keras.layers.Layer): if padding_len > 0: logger.info( - "Input ids are automatically padded from {} to {} to be a multiple of `config.attention_window`: {}".format( - seq_len, seq_len + padding_len, attention_window - ) + f"Input ids are automatically padded from {seq_len} to {seq_len + padding_len} to be a multiple of " + f"`config.attention_window`: {attention_window}" ) paddings = tf.convert_to_tensor([[0, 0], [0, padding_len]]) diff --git a/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py index 3b81362b21..7debd71af3 100755 --- a/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py @@ -29,14 +29,14 @@ logging.set_verbosity_info() def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path): # Initialise PyTorch model config = LxmertConfig.from_json_file(config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = LxmertForPreTraining(config) # Load weights from tf checkpoint load_tf_weights_in_lxmert(model, config, tf_checkpoint_path) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path) diff --git a/src/transformers/models/lxmert/modeling_lxmert.py b/src/transformers/models/lxmert/modeling_lxmert.py index d2cf8602d1..7610d5c0c5 100644 --- a/src/transformers/models/lxmert/modeling_lxmert.py +++ b/src/transformers/models/lxmert/modeling_lxmert.py @@ -205,13 +205,13 @@ def load_tf_weights_in_lxmert(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) @@ -231,7 +231,7 @@ def load_tf_weights_in_lxmert(model, config, tf_checkpoint_path): ] for n in name ): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue pointer = model for m_name in name: @@ -251,7 +251,7 @@ def load_tf_weights_in_lxmert(model, config, tf_checkpoint_path): try: pointer = getattr(pointer, scope_names[0]) except AttributeError: - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue if len(scope_names) >= 2: num = int(scope_names[1]) @@ -265,7 +265,7 @@ def load_tf_weights_in_lxmert(model, config, tf_checkpoint_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) return model @@ -315,8 +315,8 @@ class LxmertAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads self.attention_head_size = int(config.hidden_size / config.num_attention_heads) diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index e20ddc8f3c..70def7e77b 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -249,8 +249,8 @@ class TFLxmertAttention(tf.keras.layers.Layer): super().__init__(**kwargs) if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads}" ) self.num_attention_heads = config.num_attention_heads @@ -547,9 +547,9 @@ class TFLxmertEncoder(tf.keras.layers.Layer): # Layers # Using self.layer instead of self.l_layer to support loading BERT weights. - self.layer = [TFLxmertLayer(config, name="layer_._{}".format(i)) for i in range(self.num_l_layers)] - self.x_layers = [TFLxmertXLayer(config, name="x_layers_._{}".format(i)) for i in range(self.num_x_layers)] - self.r_layers = [TFLxmertLayer(config, name="r_layers_._{}".format(i)) for i in range(self.num_r_layers)] + self.layer = [TFLxmertLayer(config, name=f"layer_._{i}") for i in range(self.num_l_layers)] + self.x_layers = [TFLxmertXLayer(config, name=f"x_layers_._{i}") for i in range(self.num_x_layers)] + self.r_layers = [TFLxmertLayer(config, name=f"r_layers_._{i}") for i in range(self.num_r_layers)] self.config = config def call( diff --git a/src/transformers/models/mbart/tokenization_mbart50.py b/src/transformers/models/mbart/tokenization_mbart50.py index be94eaa80a..f5f1a2f60f 100644 --- a/src/transformers/models/mbart/tokenization_mbart50.py +++ b/src/transformers/models/mbart/tokenization_mbart50.py @@ -210,7 +210,7 @@ class MBart50Tokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/mbart/tokenization_mbart50_fast.py b/src/transformers/models/mbart/tokenization_mbart50_fast.py index 0308991de6..bda4b7cf36 100644 --- a/src/transformers/models/mbart/tokenization_mbart50_fast.py +++ b/src/transformers/models/mbart/tokenization_mbart50_fast.py @@ -275,7 +275,7 @@ class MBart50TokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py index ce5396a932..5c03331eb3 100644 --- a/src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py @@ -26,12 +26,12 @@ logging.set_verbosity_info() def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, mobilebert_config_file, pytorch_dump_path): # Initialise PyTorch model config = MobileBertConfig.from_json_file(mobilebert_config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = MobileBertForPreTraining(config) # Load weights from tf checkpoint model = load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path) diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index d300e096b7..bd3f86d21e 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -77,13 +77,13 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) @@ -100,7 +100,7 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path): n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"] for n in name ): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue pointer = model for m_name in name: @@ -120,7 +120,7 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path): try: pointer = getattr(pointer, scope_names[0]) except AttributeError: - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue if len(scope_names) >= 2: num = int(scope_names[1]) @@ -136,7 +136,7 @@ def load_tf_weights_in_mobilebert(model, config, tf_checkpoint_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) return model diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 372549862e..0a103b54f6 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -210,8 +210,8 @@ class TFMobileBertSelfAttention(tf.keras.layers.Layer): super().__init__(**kwargs) if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads}" ) self.num_attention_heads = config.num_attention_heads @@ -463,9 +463,7 @@ class TFMobileBertLayer(tf.keras.layers.Layer): if self.use_bottleneck: self.bottleneck = TFBottleneck(config, name="bottleneck") if config.num_feedforward_networks > 1: - self.ffn = [ - TFFFNLayer(config, name="ffn.{}".format(i)) for i in range(config.num_feedforward_networks - 1) - ] + self.ffn = [TFFFNLayer(config, name=f"ffn.{i}") for i in range(config.num_feedforward_networks - 1)] def call(self, hidden_states, attention_mask, head_mask, output_attentions, training=False): if self.use_bottleneck: @@ -518,7 +516,7 @@ class TFMobileBertEncoder(tf.keras.layers.Layer): super().__init__(**kwargs) self.output_attentions = config.output_attentions self.output_hidden_states = config.output_hidden_states - self.layer = [TFMobileBertLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TFMobileBertLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, diff --git a/src/transformers/models/mpnet/modeling_mpnet.py b/src/transformers/models/mpnet/modeling_mpnet.py index 8b9867caeb..e64d4de30b 100644 --- a/src/transformers/models/mpnet/modeling_mpnet.py +++ b/src/transformers/models/mpnet/modeling_mpnet.py @@ -134,8 +134,8 @@ class MPNetSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index dd02f2aa41..b9362bd625 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -192,8 +192,8 @@ class TFMPNetSelfAttention(tf.keras.layers.Layer): if config.hidden_size % config.num_attention_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads}" ) self.num_attention_heads = config.num_attention_heads @@ -352,7 +352,7 @@ class TFMPNetEncoder(tf.keras.layers.Layer): self.relative_attention_num_buckets = config.relative_attention_num_buckets self.initializer_range = config.initializer_range - self.layer = [TFMPNetLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TFMPNetLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] self.relative_attention_num_buckets = config.relative_attention_num_buckets def build(self, input_shape): diff --git a/src/transformers/models/mpnet/tokenization_mpnet.py b/src/transformers/models/mpnet/tokenization_mpnet.py index b707e41931..125fde68a5 100644 --- a/src/transformers/models/mpnet/tokenization_mpnet.py +++ b/src/transformers/models/mpnet/tokenization_mpnet.py @@ -169,8 +169,8 @@ class MPNetTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) @@ -312,8 +312,8 @@ class MPNetTokenizer(PreTrainedTokenizer): for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: vocabulary indices are not consecutive." - " Please check that the vocabulary is not corrupted!".format(vocab_file) + f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!" ) index = token_index writer.write(token + "\n") diff --git a/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py index bb8aaa2282..c7576c4009 100755 --- a/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py @@ -41,9 +41,9 @@ def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, openai_c # Save pytorch-model pytorch_weights_dump_path = pytorch_dump_folder_path + "/" + WEIGHTS_NAME pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME - print("Save PyTorch model to {}".format(pytorch_weights_dump_path)) + print(f"Save PyTorch model to {pytorch_weights_dump_path}") torch.save(model.state_dict(), pytorch_weights_dump_path) - print("Save configuration file to {}".format(pytorch_config_dump_path)) + print(f"Save configuration file to {pytorch_config_dump_path}") with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string()) diff --git a/src/transformers/models/openai/modeling_openai.py b/src/transformers/models/openai/modeling_openai.py index 0864a8b328..6564a8fa42 100644 --- a/src/transformers/models/openai/modeling_openai.py +++ b/src/transformers/models/openai/modeling_openai.py @@ -67,14 +67,14 @@ def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path): if ".ckpt" in openai_checkpoint_folder_path: openai_checkpoint_folder_path = os.path.dirname(openai_checkpoint_folder_path) - logger.info("Loading weights from {}".format(openai_checkpoint_folder_path)) + logger.info(f"Loading weights from {openai_checkpoint_folder_path}") with open(openai_checkpoint_folder_path + "/parameters_names.json", "r", encoding="utf-8") as names_handle: names = json.load(names_handle) with open(openai_checkpoint_folder_path + "/params_shapes.json", "r", encoding="utf-8") as shapes_handle: shapes = json.load(shapes_handle) offsets = np.cumsum([np.prod(shape) for shape in shapes]) - init_params = [np.load(openai_checkpoint_folder_path + "/params_{}.npy".format(n)) for n in range(10)] + init_params = [np.load(openai_checkpoint_folder_path + f"/params_{n}.npy") for n in range(10)] init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] init_params = [param.reshape(shape) for param, shape in zip(init_params, shapes)] @@ -134,7 +134,7 @@ def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) return model diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 0c2c7e2a66..36679c9643 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -210,7 +210,7 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer): config.vocab_size, config.n_embd, initializer_range=config.initializer_range, name="tokens_embed" ) self.drop = tf.keras.layers.Dropout(config.embd_pdrop) - self.h = [TFBlock(config.n_ctx, config, scale=True, name="h_._{}".format(i)) for i in range(config.n_layer)] + self.h = [TFBlock(config.n_ctx, config, scale=True, name=f"h_._{i}") for i in range(config.n_layer)] def build(self, input_shape): with tf.name_scope("positions_embed"): diff --git a/src/transformers/models/openai/tokenization_openai.py b/src/transformers/models/openai/tokenization_openai.py index 8a0e58f020..92d4286c60 100644 --- a/src/transformers/models/openai/tokenization_openai.py +++ b/src/transformers/models/openai/tokenization_openai.py @@ -205,7 +205,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -223,8 +223,8 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer): for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: BPE merge indices are not consecutive." - " Please check that the tokenizer is not corrupted!".format(merge_file) + f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive." + " Please check that the tokenizer is not corrupted!" ) index = token_index writer.write(" ".join(bpe_tokens) + "\n") diff --git a/src/transformers/models/pegasus/tokenization_pegasus.py b/src/transformers/models/pegasus/tokenization_pegasus.py index 68ad5b83ad..472ca424bb 100644 --- a/src/transformers/models/pegasus/tokenization_pegasus.py +++ b/src/transformers/models/pegasus/tokenization_pegasus.py @@ -250,7 +250,7 @@ class PegasusTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/pegasus/tokenization_pegasus_fast.py b/src/transformers/models/pegasus/tokenization_pegasus_fast.py index 124bdafbae..08bd471933 100644 --- a/src/transformers/models/pegasus/tokenization_pegasus_fast.py +++ b/src/transformers/models/pegasus/tokenization_pegasus_fast.py @@ -191,7 +191,7 @@ class PegasusTokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/phobert/tokenization_phobert.py b/src/transformers/models/phobert/tokenization_phobert.py index 684f2b3f39..e99e58002e 100644 --- a/src/transformers/models/phobert/tokenization_phobert.py +++ b/src/transformers/models/phobert/tokenization_phobert.py @@ -312,7 +312,7 @@ class PhobertTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -346,7 +346,7 @@ class PhobertTokenizer(PreTrainedTokenizer): except FileNotFoundError as fnfe: raise fnfe except UnicodeError: - raise Exception("Incorrect encoding detected in {}, please " "rebuild the dataset".format(f)) + raise Exception(f"Incorrect encoding detected in {f}, please rebuild the dataset") return lines = f.readlines() diff --git a/src/transformers/models/prophetnet/tokenization_prophetnet.py b/src/transformers/models/prophetnet/tokenization_prophetnet.py index 213e303a88..cd51662b55 100644 --- a/src/transformers/models/prophetnet/tokenization_prophetnet.py +++ b/src/transformers/models/prophetnet/tokenization_prophetnet.py @@ -135,8 +135,8 @@ class ProphetNetTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = ProphetNetTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = ProphetNetTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) @@ -255,8 +255,8 @@ class ProphetNetTokenizer(PreTrainedTokenizer): for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: vocabulary indices are not consecutive." - " Please check that the vocabulary is not corrupted!".format(vocab_file) + f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!" ) index = token_index writer.write(token + "\n") diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index ae735926b2..7975361749 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -494,9 +494,7 @@ class RagModel(RagPreTrainedModel): question_encoder.config, generator.config, **kwargs ) else: - assert isinstance(config, self.config_class), "config: {} has to be of type {}".format( - config, self.config_class - ) + assert isinstance(config, self.config_class), f"config: {config} has to be of type {self.config_class}" super().__init__(config) if question_encoder is None: from ..auto.modeling_auto import AutoModel diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 84e0f50c3e..4e80f8fd08 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -496,9 +496,7 @@ class TFRagModel(TFRagPreTrainedModel): question_encoder.config, generator.config, **kwargs ) else: - assert isinstance(config, self.config_class), "config: {} has to be of type {}".format( - config, self.config_class - ) + assert isinstance(config, self.config_class), f"config: {config} has to be of type {self.config_class}" super().__init__(config, **kwargs) if question_encoder is None: diff --git a/src/transformers/models/rag/retrieval_rag.py b/src/transformers/models/rag/retrieval_rag.py index 12ad21ac43..dd1ddc03d7 100644 --- a/src/transformers/models/rag/retrieval_rag.py +++ b/src/transformers/models/rag/retrieval_rag.py @@ -133,20 +133,20 @@ class LegacyIndex(Index): ) raise EnvironmentError(msg) if resolved_archive_file == archive_file: - logger.info("loading file {}".format(archive_file)) + logger.info(f"loading file {archive_file}") else: - logger.info("loading file {} from cache at {}".format(archive_file, resolved_archive_file)) + logger.info(f"loading file {archive_file} from cache at {resolved_archive_file}") return resolved_archive_file def _load_passages(self): - logger.info("Loading passages from {}".format(self.index_path)) + logger.info(f"Loading passages from {self.index_path}") passages_path = self._resolve_path(self.index_path, self.PASSAGE_FILENAME) with open(passages_path, "rb") as passages_file: passages = pickle.load(passages_file) return passages def _deserialize_index(self): - logger.info("Loading index from {}".format(self.index_path)) + logger.info(f"Loading index from {self.index_path}") resolved_index_path = self._resolve_path(self.index_path, self.INDEX_FILENAME + ".index.dpr") self.index = faiss.read_index(resolved_index_path) resolved_meta_path = self._resolve_path(self.index_path, self.INDEX_FILENAME + ".index_meta.dpr") @@ -200,12 +200,12 @@ class HFIndexBase(Index): def _check_dataset_format(self, with_index: bool): if not isinstance(self.dataset, Dataset): - raise ValueError("Dataset should be a datasets.Dataset object, but got {}".format(type(self.dataset))) + raise ValueError(f"Dataset should be a datasets.Dataset object, but got {type(self.dataset)}") if len({"title", "text", "embeddings"} - set(self.dataset.column_names)) > 0: raise ValueError( "Dataset should be a dataset with the following columns: " "title (str), text (str) and embeddings (arrays of dimension vector_size), " - "but got columns {}".format(self.dataset.column_names) + f"but got columns {self.dataset.column_names}" ) if with_index and "embeddings" not in self.dataset.list_indexes(): raise ValueError( @@ -269,7 +269,7 @@ class CanonicalHFIndex(HFIndexBase): self.index_name = index_name self.index_path = index_path self.use_dummy_dataset = use_dummy_dataset - logger.info("Loading passages from {}".format(self.dataset_name)) + logger.info(f"Loading passages from {self.dataset_name}") dataset = load_dataset( self.dataset_name, with_index=False, split=self.dataset_split, dummy=self.use_dummy_dataset ) @@ -277,10 +277,10 @@ class CanonicalHFIndex(HFIndexBase): def init_index(self): if self.index_path is not None: - logger.info("Loading index from {}".format(self.index_path)) + logger.info(f"Loading index from {self.index_path}") self.dataset.load_faiss_index("embeddings", file=self.index_path) else: - logger.info("Loading index from {}".format(self.dataset_name + " with index name " + self.index_name)) + logger.info(f"Loading index from {self.dataset_name} with index name {self.index_name}") self.dataset = load_dataset( self.dataset_name, with_embeddings=True, @@ -313,7 +313,7 @@ class CustomHFIndex(HFIndexBase): @classmethod def load_from_disk(cls, vector_size, dataset_path, index_path): - logger.info("Loading passages from {}".format(dataset_path)) + logger.info(f"Loading passages from {dataset_path}") if dataset_path is None or index_path is None: raise ValueError( "Please provide ``dataset_path`` and ``index_path`` after calling ``dataset.save_to_disk(dataset_path)`` " @@ -324,7 +324,7 @@ class CustomHFIndex(HFIndexBase): def init_index(self): if not self.is_initialized(): - logger.info("Loading index from {}".format(self.index_path)) + logger.info(f"Loading index from {self.index_path}") self.dataset.load_faiss_index("embeddings", file=self.index_path) self._index_initialized = True @@ -520,9 +520,7 @@ class RagRetriever: start_time = time.time() ids, vectors = self.index.get_top_docs(question_hidden_states, n_docs) logger.debug( - "index search time: {} sec, batch size {}".format( - time.time() - start_time, question_hidden_states.shape - ) + f"index search time: {time.time() - start_time} sec, batch size {question_hidden_states.shape}" ) ids_batched.extend(ids) vectors_batched.extend(vectors) diff --git a/src/transformers/models/rag/tokenization_rag.py b/src/transformers/models/rag/tokenization_rag.py index d78a087bc7..d92ca1788f 100644 --- a/src/transformers/models/rag/tokenization_rag.py +++ b/src/transformers/models/rag/tokenization_rag.py @@ -34,7 +34,7 @@ class RagTokenizer: def save_pretrained(self, save_directory): if os.path.isfile(save_directory): - raise ValueError("Provided path ({}) should be a directory, not a file".format(save_directory)) + raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file") os.makedirs(save_directory, exist_ok=True) question_encoder_path = os.path.join(save_directory, "question_encoder_tokenizer") generator_path = os.path.join(save_directory, "generator_tokenizer") diff --git a/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py b/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py index ec58e2f913..32902fa8e7 100755 --- a/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py +++ b/src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py @@ -30,10 +30,10 @@ logging.set_verbosity_info() def set_param(torch_layer, weight, bias=None): # set parameter of one layer - assert torch_layer.weight.shape == weight.shape, "{} layer.weight does not match".format(torch_layer) + assert torch_layer.weight.shape == weight.shape, f"{torch_layer} layer.weight does not match" torch_layer.weight = torch.nn.Parameter(weight) if bias is not None: - assert torch_layer.bias.shape == bias.shape, "{} layer.bias does not match".format(torch_layer) + assert torch_layer.bias.shape == bias.shape, f"{torch_layer} layer.bias does not match" torch_layer.bias = torch.nn.Parameter(bias) @@ -150,9 +150,9 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size): position_embeddings = torch_model_reformer.embeddings.position_embeddings for emb_idx in range(len(position_embeddings.weights)): emb_weights = np.asarray(weights[3][emb_idx][0]) - assert position_embeddings.weights[emb_idx].shape == emb_weights.shape, "{} emb does not match".format( - position_embeddings[emb_idx] - ) + assert ( + position_embeddings.weights[emb_idx].shape == emb_weights.shape + ), f"{position_embeddings[emb_idx]} emb does not match" position_embeddings.weights[emb_idx] = torch.nn.Parameter(torch.tensor(emb_weights)) trax_layer_weights = weights[5] @@ -185,7 +185,7 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size): def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch_dump_path): # Initialise PyTorch model config = ReformerConfig.from_json_file(config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = ReformerModelWithLMHead(config) with open(trax_model_pkl_path, "rb") as f: @@ -194,7 +194,7 @@ def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file, pytorch set_model_weights_in_torch(model_weights, model, config.hidden_size) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path) diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index 0ff34454ae..516fff8f91 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -90,9 +90,8 @@ def _get_least_common_mult_chunk_len(config): return np.lcm(config.lsh_attn_chunk_length, config.local_attn_chunk_length) else: raise NotImplementedError( - "Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format( - config.attn_layers - ) + f"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {config.attn_layers}. Select " + "attn layer types from ['lsh', 'local'] only." ) @@ -107,9 +106,8 @@ def _get_min_chunk_len(config): return min(config.lsh_attn_chunk_length, config.local_attn_chunk_length) else: raise NotImplementedError( - "Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format( - config.attn_layers - ) + f"Only attn layer types 'lsh' and 'local' exist, but `config.attn_layers`: {config.attn_layers}. Select " + "attn layer types from ['lsh', 'local'] only." ) @@ -127,11 +125,11 @@ class AxialPositionEmbeddings(nn.Module): self.least_common_mult_chunk_length = _get_least_common_mult_chunk_len(config) self.weights = nn.ParameterList() - assert ( - sum(self.axial_pos_embds_dim) == config.hidden_size - ), "Make sure that config.axial_pos_embds factors: {} sum to config.hidden_size: {}".format( - self.axial_pos_embds_dim, config.hidden_size - ) + if sum(self.axial_pos_embds_dim) != config.hidden_size: + raise ValueError( + f"Make sure that config.axial_pos_embds factors: {self.axial_pos_embds_dim} sum to " + f"config.hidden_size: {config.hidden_size}" + ) # create weights for axis, axial_pos_embd_dim in enumerate(self.axial_pos_embds_dim): @@ -153,11 +151,14 @@ class AxialPositionEmbeddings(nn.Module): ] if self.training is True: - assert ( - reduce(mul, self.axial_pos_shape) == sequence_length - ), "If training, make sure that config.axial_pos_shape factors: {} multiply to sequence length. Got prod({}) != sequence_length: {}. You might want to consider padding your sequence length to {} or changing config.axial_pos_shape.".format( - self.axial_pos_shape, self.axial_pos_shape, sequence_length, reduce(mul, self.axial_pos_shape) - ) + if reduce(mul, self.axial_pos_shape) != sequence_length: + raise ValueError( + f"If training, make sure that config.axial_pos_shape factors: {self.axial_pos_shape} multiply to " + f"sequence length. Got prod({self.axial_pos_shape}) != sequence_length: {sequence_length}. " + f"You might want to consider padding your sequence length to {reduce(mul, self.axial_pos_shape)} " + "or changing config.axial_pos_shape." + ) + if self.dropout > 0: weights = torch.cat(broadcasted_weights, dim=-1) # permute weights so that 2D correctly drops dims 1 and 2 @@ -177,13 +178,12 @@ class AxialPositionEmbeddings(nn.Module): ) else: - assert ( - reduce(mul, self.axial_pos_shape) >= sequence_length - ), "Make sure that config.axial_pos_shape factors: {} multiply at least to max(sequence_length, least_common_mult_chunk_length): max({}, {})".format( - self.axial_pos_shape, - sequence_length, - self.least_common_mult_chunk_length, - ) + if reduce(mul, self.axial_pos_shape) < sequence_length: + raise ValueError( + f"Make sure that config.axial_pos_shape factors: {self.axial_pos_shape} multiply at least to " + f"max(sequence_length, least_common_mult_chunk_length): max({sequence_length}, " + f"{self.least_common_mult_chunk_length})." + ) # compute how many columns are needed max_position_id = position_ids.max().item() @@ -252,11 +252,11 @@ class ReformerEmbeddings(nn.Module): if inputs_embeds is None: inputs_embeds = self.word_embeddings(input_ids) - assert ( - position_ids.shape[-1] <= self.max_position_embeddings - ), "Sequence Length: {} has to be larger equal than config.max_position_embeddings: {}".format( - position_ids.shape[-1], self.max_position_embeddings - ) + if position_ids.shape[-1] > self.max_position_embeddings: + raise ValueError( + f"Sequence Length: {position_ids.shape[-1]} has to be larger equal than " + f"config.max_position_embeddings {self.max_position_embeddings}." + ) # dropout embeddings = nn.functional.dropout(inputs_embeds, p=self.dropout, training=self.training) @@ -322,7 +322,7 @@ class EfficientAttentionMixin: elif len(vectors.shape) == 3: return torch.reshape(vectors, split_dim_shape) else: - raise ValueError("Input vector rank should be one of [3, 4], but is: {}".format(len(vectors.shape))) + raise ValueError(f"Input vector rank should be one of [3, 4], but is: {len(vectors.shape)}") class LSHSelfAttention(nn.Module, EfficientAttentionMixin): @@ -451,14 +451,10 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): assert ( query_key_vectors.shape[-1] == self.attention_head_size - ), "last dim of query_key_vectors is {} but should be {}.".format( - query_key_vectors.shape[-1], self.attention_head_size - ) + ), f"last dim of query_key_vectors is {query_key_vectors.shape[-1]} but should be {self.attention_head_size}." assert ( value_vectors.shape[-1] == self.attention_head_size - ), "last dim of value_vectors is {} but should be {}.".format( - value_vectors.shape[-1], self.attention_head_size - ) + ), f"last dim of value_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." do_standard_self_attention = (sequence_length <= self.chunk_length) or ( use_cache and past_buckets_states[1] is not None @@ -479,7 +475,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): assert ( int(buckets.shape[-1]) == num_hashes * sequence_length - ), "last dim of buckets is {}, but should be {}".format(buckets.shape[-1], num_hashes * sequence_length) + ), f"last dim of buckets is {buckets.shape[-1]}, but should be {num_hashes * sequence_length}" sorted_bucket_idx, undo_sorted_bucket_idx = self._get_sorted_bucket_idx_and_undo_sorted_bucket_idx( sequence_length, buckets, num_hashes @@ -616,16 +612,16 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): if isinstance(self.num_buckets, int): assert ( self.num_buckets % 2 == 0 - ), "There should be an even number of bucktes, but `self.num_bucktes`: {}".format(self.num_buckets) + ), f"There should be an even number of bucktes, but `self.num_bucktes`: {self.num_buckets}" rotation_size = self.num_buckets num_buckets = self.num_buckets else: # Factorize the hash if self.num_buckets is a list or tuple rotation_size, num_buckets = 0, 1 for bucket_factor in self.num_buckets: - assert bucket_factor % 2 == 0, "The number of buckets should be even, but `num_bucket`: {}".format( - bucket_factor - ) + assert ( + bucket_factor % 2 == 0 + ), f"The number of buckets should be even, but `num_bucket`: {bucket_factor}" rotation_size = rotation_size + bucket_factor num_buckets = num_buckets * bucket_factor @@ -714,7 +710,7 @@ class LSHSelfAttention(nn.Module, EfficientAttentionMixin): if num_buckets > num_buckets_limit: num_buckets = [2 ** (num_buckets_pow_2 // 2), 2 ** (num_buckets_pow_2 - num_buckets_pow_2 // 2)] - logger.warning("config.num_buckets is not set. Setting config.num_buckets to {}...".format(num_buckets)) + logger.warning(f"config.num_buckets is not set. Setting config.num_buckets to {num_buckets}...") # set num buckets in config to be properly saved self.config.num_buckets = num_buckets @@ -1085,19 +1081,13 @@ class LocalSelfAttention(nn.Module, EfficientAttentionMixin): assert ( query_vectors.shape[-1] == self.attention_head_size - ), "last dim of query_key_vectors is {} but should be {}.".format( - query_vectors.shape[-1], self.attention_head_size - ) + ), f"last dim of query_key_vectors is {query_vectors.shape[-1]} but should be {self.attention_head_size}." assert ( key_vectors.shape[-1] == self.attention_head_size - ), "last dim of query_key_vectors is {} but should be {}.".format( - key_vectors.shape[-1], self.attention_head_size - ) + ), f"last dim of query_key_vectors is {key_vectors.shape[-1]} but should be {self.attention_head_size}." assert ( value_vectors.shape[-1] == self.attention_head_size - ), "last dim of query_key_vectors is {} but should be {}.".format( - value_vectors.shape[-1], self.attention_head_size - ) + ), f"last dim of query_key_vectors is {value_vectors.shape[-1]} but should be {self.attention_head_size}." if self.chunk_length is None: assert ( @@ -1280,9 +1270,8 @@ class ReformerAttention(nn.Module): self.self_attention = LocalSelfAttention(config) else: raise NotImplementedError( - "Only attn layer types 'lsh' and 'local' exist, but got `config.attn_layers`: {}. Select attn layer types from ['lsh', 'local'] only.".format( - self.attn_layers - ) + f"Only attn layer types 'lsh' and 'local' exist, but got `config.attn_layers`: {self.attn_layers}. " + "Select attn layer types from ['lsh', 'local'] only." ) self.output = ReformerSelfOutput(config) @@ -2036,7 +2025,7 @@ class ReformerModel(ReformerPreTrainedModel): assert ( len(input_shape) == 2 - ), "`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {}".format(input_shape) + ), f"`input_ids` have be of shape `[batch_size, sequence_length]`, but got shape: {input_shape}" if past_buckets_states is not None: assert not self.training, "`past_buckets_states` can only be used for inference, not for training`." @@ -2062,9 +2051,9 @@ class ReformerModel(ReformerPreTrainedModel): if self.training is True: raise ValueError( - "If training, sequence Length {} has to be a multiple of least common multiple chunk_length {}. Please consider padding the input to a length of {}.".format( - input_shape[-1], least_common_mult_chunk_length, input_shape[-1] + padding_length - ) + f"If training, sequence length {input_shape[-1]} has to be a multiple of least common multiple " + f"chunk_length {least_common_mult_chunk_length}. Please consider padding the input to a length " + f"of {input_shape[-1] + padding_length}." ) # pad input @@ -2134,9 +2123,8 @@ class ReformerModel(ReformerPreTrainedModel): device=None, ): logger.info( - "Input ids are automatically padded from {} to {} to be a multiple of `config.chunk_length`: {}".format( - input_shape[-1], input_shape[-1] + padding_length, padded_seq_length - ) + f"Input ids are automatically padded from {input_shape[-1]} to {input_shape[-1] + padding_length} to be a " + f"multiple of `config.chunk_length`: {padded_seq_length}" ) padded_input_ids = torch.full( diff --git a/src/transformers/models/reformer/tokenization_reformer.py b/src/transformers/models/reformer/tokenization_reformer.py index f2000d69d7..c933d0cbc7 100644 --- a/src/transformers/models/reformer/tokenization_reformer.py +++ b/src/transformers/models/reformer/tokenization_reformer.py @@ -131,7 +131,7 @@ class ReformerTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/reformer/tokenization_reformer_fast.py b/src/transformers/models/reformer/tokenization_reformer_fast.py index d8050ec642..f27b861216 100644 --- a/src/transformers/models/reformer/tokenization_reformer_fast.py +++ b/src/transformers/models/reformer/tokenization_reformer_fast.py @@ -107,7 +107,7 @@ class ReformerTokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index 0e9d214926..88155f76de 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -149,8 +149,8 @@ class RobertaSelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 07922d6b2e..e0b54e52ce 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -396,7 +396,7 @@ class TFRobertaEncoder(tf.keras.layers.Layer): def __init__(self, config: RobertaConfig, **kwargs): super().__init__(**kwargs) - self.layer = [TFRobertaLayer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TFRobertaLayer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, diff --git a/src/transformers/models/squeezebert/modeling_squeezebert.py b/src/transformers/models/squeezebert/modeling_squeezebert.py index 455bc4881d..09dcd680bb 100644 --- a/src/transformers/models/squeezebert/modeling_squeezebert.py +++ b/src/transformers/models/squeezebert/modeling_squeezebert.py @@ -172,8 +172,7 @@ class SqueezeBertSelfAttention(nn.Module): super().__init__() if cin % config.num_attention_heads != 0: raise ValueError( - "cin (%d) is not a multiple of the number of attention " - "heads (%d)" % (cin, config.num_attention_heads) + f"cin ({cin}) is not a multiple of the number of attention heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads self.attention_head_size = int(cin / config.num_attention_heads) diff --git a/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py index e38680df84..a002030168 100755 --- a/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py @@ -27,14 +27,14 @@ logging.set_verbosity_info() def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path): # Initialise PyTorch model config = T5Config.from_json_file(config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = T5ForConditionalGeneration(config) # Load weights from tf checkpoint load_tf_weights_in_t5(model, config, tf_checkpoint_path) # Save pytorch-model - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") model.save_pretrained(pytorch_dump_path) diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index 216dd03ce7..2c8463d44e 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -82,13 +82,13 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] tf_weights = {} for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) tf_weights[name] = array @@ -101,11 +101,11 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"] for n in name ): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") tf_weights.pop(txt_name, None) continue if "_slot_" in name[-1]: - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") tf_weights.pop(txt_name, None) continue pointer = model @@ -149,7 +149,7 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): try: pointer = getattr(pointer, scope_names[0]) except AttributeError: - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue if len(scope_names) >= 2: num = int(scope_names[1]) @@ -157,7 +157,7 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): if scope_names[0] not in ["kernel", "scale", "embedding"]: pointer = getattr(pointer, "weight") if scope_names[0] != "embedding": - logger.info("Transposing numpy weight of shape {} for {}".format(array.shape, name)) + logger.info(f"Transposing numpy weight of shape {array.shape} for {name}") array = np.transpose(array) try: assert ( @@ -166,11 +166,11 @@ def load_tf_weights_in_t5(model, config, tf_checkpoint_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array.astype(np.float32)) tf_weights.pop(txt_name, None) - logger.info("Weights not copied to PyTorch model: {}".format(", ".join(tf_weights.keys()))) + logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}.") return model @@ -428,9 +428,7 @@ class T5Attention(nn.Module): if past_key_value is not None: assert ( len(past_key_value) == 2 - ), "past_key_value should have 2 past states: keys and values. Got {} past states".format( - len(past_key_value) - ) + ), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states" real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length key_length = real_seq_length if key_value_states is None else key_value_states.shape[1] @@ -618,12 +616,12 @@ class T5Block(nn.Module): assert self.is_decoder, "Only decoder can use `past_key_values`" expected_num_past_key_values = 2 if encoder_hidden_states is None else 4 - error_message = "There should be {} past states. 2 (past / key) for self attention.{} Got {} past key / value states".format( - expected_num_past_key_values, - "2 (past / key) for cross attention" if expected_num_past_key_values == 4 else "", - len(past_key_value), - ) - assert len(past_key_value) == expected_num_past_key_values, error_message + if len(past_key_value) != expected_num_past_key_values: + raise ValueError( + f"There should be {expected_num_past_key_values} past states. " + f"{'2 (past / key) for cross attention' if expected_num_past_key_values == 4 else ''}." + f"Got {len(past_key_value)} past key / value states" + ) self_attn_past_key_value = past_key_value[:2] cross_attn_past_key_value = past_key_value[2:] @@ -888,9 +886,7 @@ class T5Stack(T5PreTrainedModel): mask_seq_length = past_key_values[0][0].shape[2] + seq_length if past_key_values is not None else seq_length if use_cache is True: - assert self.is_decoder, ":obj:`use_cache` can only be set to `True` if {} is used as a decoder".format( - self - ) + assert self.is_decoder, f":obj:`use_cache` can only be set to `True` if {self} is used as a decoder" if attention_mask is None: attention_mask = torch.ones(batch_size, mask_seq_length).to(inputs_embeds.device) diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 9f5fa0737e..d964815a6f 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -273,9 +273,7 @@ class TFT5Attention(tf.keras.layers.Layer): if past_key_value is not None: assert ( len(past_key_value) == 2 - ), "past_key_value should have 2 past states: keys and values. Got {} past states".format( - len(past_key_value) - ) + ), f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states" real_seq_length += shape_list(past_key_value[0])[2] if query_length is None else query_length key_length = real_seq_length if key_value_states is None else shape_list(key_value_states)[1] @@ -472,7 +470,7 @@ class TFT5Block(tf.keras.layers.Layer): ) ) - self.layer.append(TFT5LayerFF(config, name="layer_._{}".format(len(self.layer)))) + self.layer.append(TFT5LayerFF(config, name=f"layer_._{len(self.layer)}")) def call( self, @@ -494,12 +492,12 @@ class TFT5Block(tf.keras.layers.Layer): assert self.is_decoder, "Only decoder can use `past_key_values`" expected_num_past_key_values = 2 if encoder_hidden_states is None else 4 - error_message = "There should be {} past states. 2 (past / key) for self attention.{} Got {} past key / value states".format( - expected_num_past_key_values, - "2 (past / key) for cross attention" if expected_num_past_key_values == 4 else "", - len(past_key_value), - ) - assert len(past_key_value) == expected_num_past_key_values, error_message + if len(past_key_value) != expected_num_past_key_values: + raise ValueError( + f"There should be {expected_num_past_key_values} past states. " + f"{'2 (past / key) for cross attention' if expected_num_past_key_values == 4 else ''}." + f"Got {len(past_key_value)} past key / value states" + ) self_attn_past_key_value = past_key_value[:2] cross_attn_past_key_value = past_key_value[2:] @@ -579,11 +577,7 @@ class TFT5MainLayer(tf.keras.layers.Layer): self.num_hidden_layers = config.num_layers self.block = [ - TFT5Block( - config, - has_relative_attention_bias=bool(i == 0), - name="block_._{}".format(i), - ) + TFT5Block(config, has_relative_attention_bias=bool(i == 0), name=f"block_._{i}") for i in range(config.num_layers) ] self.final_layer_norm = TFT5LayerNorm(epsilon=config.layer_norm_epsilon, name="final_layer_norm") diff --git a/src/transformers/models/t5/tokenization_t5.py b/src/transformers/models/t5/tokenization_t5.py index 07c2fdf47b..74dc811c6e 100644 --- a/src/transformers/models/t5/tokenization_t5.py +++ b/src/transformers/models/t5/tokenization_t5.py @@ -104,7 +104,7 @@ class T5Tokenizer(PreTrainedTokenizer): ): # Add extra_ids to the special token list if extra_ids > 0 and additional_special_tokens is None: - additional_special_tokens = ["".format(i) for i in range(extra_ids)] + additional_special_tokens = [f"" for i in range(extra_ids)] elif extra_ids > 0 and additional_special_tokens is not None: # Check that we have the right number of extra_id special tokens extra_tokens = len(set(filter(lambda x: bool("extra_id" in x), additional_special_tokens))) @@ -257,7 +257,7 @@ class T5Tokenizer(PreTrainedTokenizer): if index < self.sp_model.get_piece_size(): token = self.sp_model.IdToPiece(index) else: - token = "".format(self.vocab_size - 1 - index) + token = f"" return token def convert_tokens_to_string(self, tokens): @@ -276,7 +276,7 @@ class T5Tokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/t5/tokenization_t5_fast.py b/src/transformers/models/t5/tokenization_t5_fast.py index 10986695df..7486f7a05f 100644 --- a/src/transformers/models/t5/tokenization_t5_fast.py +++ b/src/transformers/models/t5/tokenization_t5_fast.py @@ -115,7 +115,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast): ): # Add extra_ids to the special token list if extra_ids > 0 and additional_special_tokens is None: - additional_special_tokens = ["".format(i) for i in range(extra_ids)] + additional_special_tokens = [f"" for i in range(extra_ids)] elif extra_ids > 0 and additional_special_tokens is not None: # Check that we have the right number of extra special tokens extra_tokens = len(set(filter(lambda x: bool("extra_id_" in x), additional_special_tokens))) @@ -141,7 +141,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py index 63beedea3e..db2f2558b5 100644 --- a/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py @@ -82,20 +82,20 @@ def convert_tf_checkpoint_to_pytorch( elif task == "INTERMEDIATE_PRETRAINING": model = TapasModel(config=config) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") # Load weights from tf checkpoint load_tf_weights_in_tapas(model, config, tf_checkpoint_path) # Save pytorch-model (weights and configuration) - print("Save PyTorch model to {}".format(pytorch_dump_path)) + print(f"Save PyTorch model to {pytorch_dump_path}") model.save_pretrained(pytorch_dump_path[:-17]) # Save tokenizer files dir_name = r"C:\Users\niels.rogge\Documents\Python projecten\tensorflow\Tensorflow models\SQA\Base\tapas_sqa_inter_masklm_base_reset" tokenizer = TapasTokenizer(vocab_file=dir_name + r"\vocab.txt", model_max_length=512) - print("Save tokenizer files to {}".format(pytorch_dump_path)) + print(f"Save tokenizer files to {pytorch_dump_path}") tokenizer.save_pretrained(pytorch_dump_path[:-17]) print("Used relative position embeddings:", model.config.reset_position_index_per_cell) diff --git a/src/transformers/models/tapas/modeling_tapas.py b/src/transformers/models/tapas/modeling_tapas.py index cecdd7b4e1..fd1d08145c 100644 --- a/src/transformers/models/tapas/modeling_tapas.py +++ b/src/transformers/models/tapas/modeling_tapas.py @@ -142,13 +142,13 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path): ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) @@ -169,19 +169,19 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path): ] for n in name ): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue # in case the model is TapasForSequenceClassification, we skip output_bias and output_weights # since these are not used for classification if isinstance(model, TapasForSequenceClassification): if any(n in ["output_bias", "output_weights"] for n in name): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue # in case the model is TapasModel, we skip output_bias, output_weights, output_bias_cls and output_weights_cls # since this model does not have MLM and NSP heads if isinstance(model, TapasModel): if any(n in ["output_bias", "output_weights", "output_bias_cls", "output_weights_cls"] for n in name): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue # if first scope name starts with "bert", change it to "tapas" if name[0] == "bert": @@ -223,7 +223,7 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path): try: pointer = getattr(pointer, scope_names[0]) except AttributeError: - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue if len(scope_names) >= 2: num = int(scope_names[1]) @@ -241,7 +241,7 @@ def load_tf_weights_in_tapas(model, config, tf_checkpoint_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") # Added a check to see whether the array is a scalar (because bias terms in Tapas checkpoints can be # scalar => should first be converted to numpy arrays) if np.isscalar(array): diff --git a/src/transformers/models/tapas/tokenization_tapas.py b/src/transformers/models/tapas/tokenization_tapas.py index 6fe7737cc5..9716193951 100644 --- a/src/transformers/models/tapas/tokenization_tapas.py +++ b/src/transformers/models/tapas/tokenization_tapas.py @@ -324,8 +324,8 @@ class TapasTokenizer(PreTrainedTokenizer): if not os.path.isfile(vocab_file): raise ValueError( - "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " - "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file) + f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`" ) self.vocab = load_vocab(vocab_file) self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()]) @@ -1208,9 +1208,9 @@ class TapasTokenizer(PreTrainedTokenizer): if max_length is None and len(encoded_inputs["input_ids"]) > self.model_max_length and verbose: if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False): logger.warning( - "Token indices sequence length is longer than the specified maximum sequence length " - "for this model ({} > {}). Running this sequence through the model will result in " - "indexing errors".format(len(encoded_inputs["input_ids"]), self.model_max_length) + f"Token indices sequence length is longer than the specified maximum sequence length " + f"for this model ({len(encoded_inputs['input_ids'])} > {self.model_max_length}). Running this " + "sequence through the model will result in indexing errors." ) self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True @@ -1670,7 +1670,7 @@ class TapasTokenizer(PreTrainedTokenizer): def _find_tokens(self, text, segment): """Return start index of segment in text or None.""" - logging.info("text: %s %s", text, segment) + logging.info(f"text: {text} {segment}") for index in range(1 + len(text) - len(segment)): for seg_index, seg_token in enumerate(segment): if text[index + seg_index].piece != seg_token.piece: @@ -1685,7 +1685,7 @@ class TapasTokenizer(PreTrainedTokenizer): answer_text, ): """Returns all occurrences of answer_text in the table.""" - logging.info("answer text: %s", answer_text) + logging.info(f"answer text: {answer_text}") for row_index, row in enumerate(tokenized_table.rows): if row_index == 0: # We don't search for answers in the header. @@ -2347,7 +2347,7 @@ _INF = float("INF") def _get_numeric_value_from_date(date, mask): """Converts date (datetime Python object) to a NumericValue object with a Date object value.""" if date.year < _MIN_YEAR or date.year > _MAX_YEAR: - raise ValueError("Invalid year: %d" % date.year) + raise ValueError(f"Invalid year: {date.year}") new_date = Date() if mask.year: @@ -2523,7 +2523,7 @@ def _get_value_type(numeric_value): return NUMBER_TYPE elif numeric_value.date is not None: return DATE_TYPE - raise ValueError("Unknown type: %s" % numeric_value) + raise ValueError(f"Unknown type: {numeric_value}") def _get_value_as_primitive_value(numeric_value): @@ -2541,7 +2541,7 @@ def _get_value_as_primitive_value(numeric_value): if date.day is not None: value_tuple[2] = float(date.day) return tuple(value_tuple) - raise ValueError("Unknown type: %s" % numeric_value) + raise ValueError(f"Unknown type: {numeric_value}") def _get_all_types(numeric_values): @@ -2567,7 +2567,7 @@ def get_numeric_sort_key_fn(numeric_values): """ value_types = _get_all_types(numeric_values) if len(value_types) != 1: - raise ValueError("No common value type in %s" % numeric_values) + raise ValueError(f"No common value type in {numeric_values}") value_type = next(iter(value_types)) if value_type == NUMBER_TYPE: @@ -2586,7 +2586,7 @@ def get_numeric_sort_key_fn(numeric_values): valid_indexes.discard(tuple_index) if not valid_indexes: - raise ValueError("No common value in %s" % numeric_values) + raise ValueError(f"No common value in {numeric_values}") def _sort_key_fn(numeric_value): value = _get_value_as_primitive_value(numeric_value) @@ -2618,8 +2618,7 @@ def _consolidate_numeric_values(row_index_to_values, min_consolidation_fraction, return {} max_count = max(type_counts.values()) if max_count < len(row_index_to_values) * min_consolidation_fraction: - # logging.log_every_n(logging.INFO, 'Can\'t consolidate types: %s %s %d', 100, - # debug_info, row_index_to_values, max_count) + # logging.log_every_n(logging.INFO, f'Can\'t consolidate types: {debug_info} {row_index_to_values} {max_count}', 100) return {} valid_types = set() @@ -2708,15 +2707,13 @@ def filter_invalid_unicode_from_table(table): cell, is_invalid = filter_invalid_unicode(cell) if is_invalid: logging.warning( - "Scrub an invalid table body @ table_id: %s, row_index: %d, " "col_index: %d", - table.table_id, - row_index, - col_index, + f"Scrub an invalid table body @ table_id: {table.table_id}, row_index: {row_index}, " + f"col_index: {col_index}", ) for col_index, column in enumerate(table.columns): column, is_invalid = filter_invalid_unicode(column) if is_invalid: - logging.warning("Scrub an invalid table header @ table_id: %s, col_index: %d", table.table_id, col_index) + logging.warning(f"Scrub an invalid table header @ table_id: {table.table_id}, col_index: {col_index}") def add_numeric_table_values(table, min_consolidation_fraction=0.7, debug_info=None): diff --git a/src/transformers/models/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py index 26355455f8..db040a31a8 100755 --- a/src/transformers/models/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py @@ -48,14 +48,14 @@ def convert_transfo_xl_checkpoint_to_pytorch( corpus = pickle.load(fp, encoding="latin1") # Save vocabulary and dataset cache as Dictionaries (should be better than pickles for the long-term) pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["pretrained_vocab_file"] - print("Save vocabulary to {}".format(pytorch_vocab_dump_path)) + print(f"Save vocabulary to {pytorch_vocab_dump_path}") corpus_vocab_dict = corpus.vocab.__dict__ torch.save(corpus_vocab_dict, pytorch_vocab_dump_path) corpus_dict_no_vocab = corpus.__dict__ corpus_dict_no_vocab.pop("vocab", None) pytorch_dataset_dump_path = pytorch_dump_folder_path + "/" + CORPUS_NAME - print("Save dataset to {}".format(pytorch_dataset_dump_path)) + print(f"Save dataset to {pytorch_dataset_dump_path}") torch.save(corpus_dict_no_vocab, pytorch_dataset_dump_path) if tf_checkpoint_path: @@ -63,22 +63,22 @@ def convert_transfo_xl_checkpoint_to_pytorch( config_path = os.path.abspath(transfo_xl_config_file) tf_path = os.path.abspath(tf_checkpoint_path) - print("Converting Transformer XL checkpoint from {} with config at {}".format(tf_path, config_path)) + print(f"Converting Transformer XL checkpoint from {tf_path} with config at {config_path}.") # Initialise PyTorch model if transfo_xl_config_file == "": config = TransfoXLConfig() else: config = TransfoXLConfig.from_json_file(transfo_xl_config_file) - print("Building PyTorch model from configuration: {}".format(str(config))) + print(f"Building PyTorch model from configuration: {config}") model = TransfoXLLMHeadModel(config) model = load_tf_weights_in_transfo_xl(model, config, tf_path) # Save pytorch-model pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME) pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME) - print("Save PyTorch model to {}".format(os.path.abspath(pytorch_weights_dump_path))) + print(f"Save PyTorch model to {os.path.abspath(pytorch_weights_dump_path)}") torch.save(model.state_dict(), pytorch_weights_dump_path) - print("Save configuration file to {}".format(os.path.abspath(pytorch_config_dump_path))) + print(f"Save configuration file to {os.path.abspath(pytorch_config_dump_path)}") with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string()) diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index 31d3aae482..c0701f7ea6 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -368,7 +368,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer): r_idx - l_idx, d_emb_i, init_std, - name="emb_layers_._{}".format(i), + name=f"emb_layers_._{i}", ) ) @@ -380,7 +380,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer): shape=(d_emb_i, self.d_proj), initializer=get_initializer(self.init_std), trainable=True, - name="emb_projs_._{}".format(i), + name=f"emb_projs_._{i}", ) ) @@ -467,7 +467,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): layer_norm_epsilon=config.layer_norm_epsilon, init_std=config.init_std, output_attentions=self.output_attentions, - name="layers_._{}".format(i), + name=f"layers_._{i}", ) ) else: # learnable embeddings and absolute embeddings diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl_utilities.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl_utilities.py index 9797a8fa66..699e278583 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl_utilities.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl_utilities.py @@ -59,25 +59,22 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): shape=(self.d_embed, self.d_proj), initializer="zeros", trainable=True, - name="out_projs_._{}".format(i), + name=f"out_projs_._{i}", ) self.out_projs.append(weight) else: self.out_projs.append(None) weight = self.add_weight( - shape=( - self.vocab_size, - self.d_embed, - ), + shape=(self.vocab_size, self.d_embed), initializer="zeros", trainable=True, - name="out_layers_._{}_._weight".format(i), + name=f"out_layers_._{i}_._weight", ) bias = self.add_weight( shape=(self.vocab_size,), initializer="zeros", trainable=True, - name="out_layers_._{}_._bias".format(i), + name=f"out_layers_._{i}_._bias", ) self.out_layers.append((weight, bias)) else: @@ -86,23 +83,20 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): d_emb_i = self.d_embed // (self.div_val ** i) weight = self.add_weight( - shape=(d_emb_i, self.d_proj), initializer="zeros", trainable=True, name="out_projs_._{}".format(i) + shape=(d_emb_i, self.d_proj), initializer="zeros", trainable=True, name=f"out_projs_._{i}" ) self.out_projs.append(weight) weight = self.add_weight( - shape=( - r_idx - l_idx, - d_emb_i, - ), + shape=(r_idx - l_idx, d_emb_i), initializer="zeros", trainable=True, - name="out_layers_._{}_._weight".format(i), + name=f"out_layers_._{i}_._weight", ) bias = self.add_weight( shape=(r_idx - l_idx,), initializer="zeros", trainable=True, - name="out_layers_._{}_._bias".format(i), + name=f"out_layers_._{i}_._bias", ) self.out_layers.append((weight, bias)) super().build(input_shape) diff --git a/src/transformers/models/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_transfo_xl.py index bab4af8b3f..b036cf71d8 100644 --- a/src/transformers/models/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_transfo_xl.py @@ -67,7 +67,7 @@ def build_tf_to_pytorch_map(model, config): for i, (out_l, proj_l, tie_proj) in enumerate( zip(model.crit.out_layers, model.crit.out_projs, config.tie_projs) ): - layer_str = "transformer/adaptive_softmax/cutoff_%d/" % i + layer_str = f"transformer/adaptive_softmax/cutoff_{i}/" if config.tie_word_embeddings: tf_to_pt_map.update({layer_str + "b": out_l.bias}) else: @@ -81,12 +81,12 @@ def build_tf_to_pytorch_map(model, config): # Embeddings for i, (embed_l, proj_l) in enumerate(zip(model.word_emb.emb_layers, model.word_emb.emb_projs)): - layer_str = "transformer/adaptive_embed/cutoff_%d/" % i + layer_str = f"transformer/adaptive_embed/cutoff_{i}/" tf_to_pt_map.update({layer_str + "lookup_table": embed_l.weight, layer_str + "proj_W": proj_l}) # Transformer blocks for i, b in enumerate(model.layers): - layer_str = "transformer/layer_%d/" % i + layer_str = f"transformer/layer_{i}/" tf_to_pt_map.update( { layer_str + "rel_attn/LayerNorm/gamma": b.dec_attn.layer_norm.weight, @@ -135,7 +135,7 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path): init_vars = tf.train.list_variables(tf_path) tf_weights = {} for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) tf_weights[name] = array @@ -156,7 +156,7 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path): except AssertionError as e: e.args += (p_i.shape, arr_i.shape) raise - logger.info("Initialize PyTorch weight {} for layer {}".format(name, i)) + logger.info(f"Initialize PyTorch weight {name} for layer {i}") p_i.data = torch.from_numpy(arr_i) else: try: @@ -166,13 +166,13 @@ def load_tf_weights_in_transfo_xl(model, config, tf_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) tf_weights.pop(name, None) tf_weights.pop(name + "/Adam", None) tf_weights.pop(name + "/Adam_1", None) - logger.info("Weights not copied to PyTorch model: {}".format(", ".join(tf_weights.keys()))) + logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}") return model diff --git a/src/transformers/models/transfo_xl/tokenization_transfo_xl.py b/src/transformers/models/transfo_xl/tokenization_transfo_xl.py index b4d4fc80e1..9b185ecdd1 100644 --- a/src/transformers/models/transfo_xl/tokenization_transfo_xl.py +++ b/src/transformers/models/transfo_xl/tokenization_transfo_xl.py @@ -198,7 +198,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer): self.vocab_file = vocab_file self.never_split = never_split self.punctuation_symbols = '!"#$%&()*+,-./\\:;<=>?@[\\]^_`{|}~' - self.punction_without_space_before_pattern = re.compile(r"[^\s][{}]".format(self.punctuation_symbols)) + self.punction_without_space_before_pattern = re.compile(rf"[^\s][{self.punctuation_symbols}]") self.punctuation_with_space_around_pattern = self._compile_space_around_punctuation_pattern() self.language = language self.moses_punct_normalizer = sm.MosesPunctNormalizer(language) @@ -235,9 +235,9 @@ class TransfoXLTokenizer(PreTrainedTokenizer): except Exception as e: raise ValueError( - "Unable to parse file {}. Unknown format. " + f"Unable to parse file {pretrained_vocab_file}. Unknown format. " "If you tried to load a model saved through TransfoXLTokenizerFast," - "please note they are not compatible.".format(pretrained_vocab_file) + "please note they are not compatible." ) from e if vocab_file is not None: @@ -248,20 +248,20 @@ class TransfoXLTokenizer(PreTrainedTokenizer): return self.lower_case def _compile_space_around_punctuation_pattern(self): - look_ahead_for_special_token = "(?=[{}])".format(self.punctuation_symbols) + look_ahead_for_special_token = f"(?=[{self.punctuation_symbols}])" look_ahead_to_match_all_except_space = r"(?=[^\s])" return re.compile(r"" + look_ahead_for_special_token + look_ahead_to_match_all_except_space) def count_file(self, path, verbose=False, add_eos=False): if verbose: - logger.info("counting file {} ...".format(path)) + logger.info(f"counting file {path} ...") assert os.path.exists(path), f"Input file {path} not found" sents = [] with open(path, "r", encoding="utf-8") as f: for idx, line in enumerate(f): if verbose and idx > 0 and idx % 500000 == 0: - logger.info(" line {}".format(idx)) + logger.info(f" line {idx}") symbols = self.tokenize(line, add_eos=add_eos) self.counter.update(symbols) sents.append(symbols) @@ -273,10 +273,10 @@ class TransfoXLTokenizer(PreTrainedTokenizer): sents : a list of sentences, each a list of tokenized symbols """ if verbose: - logger.info("counting {} sents ...".format(len(sents))) + logger.info(f"counting {len(sents)} sents ...") for idx, symbols in enumerate(sents): if verbose and idx > 0 and idx % 500000 == 0: - logger.info(" line {}".format(idx)) + logger.info(f" line {idx}") self.counter.update(symbols) def _build_from_file(self, vocab_file): @@ -308,11 +308,11 @@ class TransfoXLTokenizer(PreTrainedTokenizer): def build_vocab(self): if self.vocab_file: - logger.info("building vocab from {}".format(self.vocab_file)) + logger.info(f"building vocab from {self.vocab_file}") self._build_from_file(self.vocab_file) - logger.info("final vocab size {}".format(len(self))) + logger.info(f"final vocab size {len(self)}") else: - logger.info("building vocab with min_freq={}, max_size={}".format(self.min_freq, self.max_size)) + logger.info(f"building vocab with min_freq={self.min_freq}, max_size={self.max_size}") self.idx2sym = [] self.sym2idx = OrderedDict() @@ -324,18 +324,18 @@ class TransfoXLTokenizer(PreTrainedTokenizer): break self.add_symbol(sym) - logger.info("final vocab size {} from {} unique tokens".format(len(self), len(self.counter))) + logger.info(f"final vocab size {len(self)} from {len(self.counter)} unique tokens") @torch_only_method def encode_file(self, path, ordered=False, verbose=False, add_eos=True, add_double_eos=False): if verbose: - logger.info("encoding file {} ...".format(path)) + logger.info(f"encoding file {path} ...") assert os.path.exists(path), f"Output file {path} not found" encoded = [] with open(path, "r", encoding="utf-8") as f: for idx, line in enumerate(f): if verbose and idx > 0 and idx % 500000 == 0: - logger.info(" line {}".format(idx)) + logger.info(f" line {idx}") symbols = self.tokenize(line, add_eos=add_eos, add_double_eos=add_double_eos) encoded.append(self.convert_to_tensor(symbols)) @@ -347,11 +347,11 @@ class TransfoXLTokenizer(PreTrainedTokenizer): @torch_only_method def encode_sents(self, sents, ordered=False, verbose=False): if verbose: - logger.info("encoding {} sents ...".format(len(sents))) + logger.info(f"encoding {len(sents)} sents ...") encoded = [] for idx, symbols in enumerate(sents): if verbose and idx > 0 and idx % 500000 == 0: - logger.info(" line {}".format(idx)) + logger.info(f" line {idx}") encoded.append(self.convert_to_tensor(symbols)) if ordered: @@ -363,7 +363,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer): if sym not in self.sym2idx: self.idx2sym.append(sym) self.sym2idx[sym] = len(self.idx2sym) - 1 - setattr(self, "{}_idx".format(sym.strip("<>")), self.sym2idx[sym]) + setattr(self, f"{sym.strip('<>')}_idx", self.sym2idx[sym]) def add_symbol(self, sym): if sym not in self.sym2idx: @@ -430,7 +430,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer): def _convert_id_to_token(self, idx): """Converts an id in a token (BPE) using the vocab.""" - assert 0 <= idx < len(self), "Index {} out of vocabulary range".format(idx) + assert 0 <= idx < len(self), f"Index {idx} out of vocabulary range" return self.idx2sym[idx] def _convert_token_to_id(self, sym): @@ -438,7 +438,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer): if sym in self.sym2idx: return self.sym2idx[sym] else: - # logger.info('encounter unk {}'.format(sym)) + # logger.info(f'encounter unk {sym}') # assert '' not in sym if hasattr(self, "unk_idx"): return self.sym2idx.get(sym, self.unk_idx) @@ -675,20 +675,16 @@ class TransfoXLCorpus(object): resolved_corpus_file = cached_path(corpus_file, cache_dir=cache_dir) except EnvironmentError: logger.error( - "Corpus '{}' was not found in corpus list ({}). " - "We assumed '{}' was a path or url but couldn't find files {} " - "at this path or url.".format( - pretrained_model_name_or_path, - ", ".join(PRETRAINED_CORPUS_ARCHIVE_MAP.keys()), - pretrained_model_name_or_path, - corpus_file, - ) + f"Corpus '{pretrained_model_name_or_path}' was not found in corpus list " + f"({', '.join(PRETRAINED_CORPUS_ARCHIVE_MAP.keys())}. " + f"We assumed '{pretrained_model_name_or_path}' was a path or url but couldn't find files {corpus_file} " + "at this path or url." ) return None if resolved_corpus_file == corpus_file: - logger.info("loading corpus file {}".format(corpus_file)) + logger.info(f"loading corpus file {corpus_file}") else: - logger.info("loading corpus file {} from cache at {}".format(corpus_file, resolved_corpus_file)) + logger.info(f"loading corpus file {corpus_file} from cache at {resolved_corpus_file}") # Instantiate tokenizer. corpus = cls(*inputs, **kwargs) @@ -777,7 +773,7 @@ def get_lm_corpus(datadir, dataset): with open(fn, "rb") as fp: corpus = pickle.load(fp) else: - logger.info("Producing dataset {}...".format(dataset)) + logger.info(f"Producing dataset {dataset}...") kwargs = {} if dataset in ["wt103", "wt2"]: kwargs["special"] = [""] diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index bbc8180918..841a7b317f 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -260,7 +260,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -514,7 +514,7 @@ class Wav2Vec2Tokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py index 82e5e24d34..99c837765c 100755 --- a/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py @@ -54,14 +54,14 @@ def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_folder_p pytorch_config_dump_path = pytorch_dump_folder_path + "/" + CONFIG_NAME pytorch_vocab_dump_path = pytorch_dump_folder_path + "/" + VOCAB_FILES_NAMES["vocab_file"] - print("Save PyTorch model to {}".format(pytorch_weights_dump_path)) + print(f"Save PyTorch model to {pytorch_weights_dump_path}") torch.save(two_levels_state_dict, pytorch_weights_dump_path) - print("Save configuration file to {}".format(pytorch_config_dump_path)) + print(f"Save configuration file to {pytorch_config_dump_path}") with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(json.dumps(config, indent=2) + "\n") - print("Save vocab file to {}".format(pytorch_config_dump_path)) + print(f"Save vocab file to {pytorch_config_dump_path}") with open(pytorch_vocab_dump_path, "w", encoding="utf-8") as f: f.write(json.dumps(vocab, indent=2) + "\n") diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index fb1ba012e7..f2989ffa56 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -146,7 +146,7 @@ class TFXLMMultiHeadAttention(tf.keras.layers.Layer): else: klen = shape_list(kv)[1] - # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim) + # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured' dim_per_head = self.dim // self.n_heads mask_reshape = (bs, 1, qlen, klen) if len(shape_list(mask)) == 3 else (bs, 1, 1, klen) @@ -289,19 +289,19 @@ class TFXLMMainLayer(tf.keras.layers.Layer): for i in range(self.n_layers): self.attentions.append( - TFXLMMultiHeadAttention(self.n_heads, self.dim, config=config, name="attentions_._{}".format(i)) + TFXLMMultiHeadAttention(self.n_heads, self.dim, config=config, name=f"attentions_._{i}") ) self.layer_norm1.append( - tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm1_._{}".format(i)) + tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm1_._{i}") ) # if self.is_decoder: # self.layer_norm15.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps)) # self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout)) self.ffns.append( - TFXLMTransformerFFN(self.dim, self.hidden_dim, self.dim, config=config, name="ffns_._{}".format(i)) + TFXLMTransformerFFN(self.dim, self.hidden_dim, self.dim, config=config, name=f"ffns_._{i}") ) self.layer_norm2.append( - tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm2_._{}".format(i)) + tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name=f"layer_norm2_._{i}") ) if hasattr(config, "pruned_heads"): diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index 8867d32547..3ccd63ee97 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -153,7 +153,7 @@ class MultiHeadAttention(nn.Module): klen = qlen if cache is None else cache["slen"] + qlen else: klen = kv.size(1) - # assert dim == self.dim, 'Dimensions do not match: %s input vs %s configured' % (dim, self.dim) + # assert dim == self.dim, f'Dimensions do not match: {dim} input vs {self.dim} configured' n_heads = self.n_heads dim_per_head = self.dim // n_heads mask_reshape = (bs, 1, qlen, klen) if mask.dim() == 3 else (bs, 1, 1, klen) diff --git a/src/transformers/models/xlm/tokenization_xlm.py b/src/transformers/models/xlm/tokenization_xlm.py index 980e9c9637..d861ccc0ed 100644 --- a/src/transformers/models/xlm/tokenization_xlm.py +++ b/src/transformers/models/xlm/tokenization_xlm.py @@ -682,7 +682,7 @@ class XLMTokenizer(PreTrainedTokenizer): import Mykytea self.ja_word_tokenizer = Mykytea.Mykytea( - "-model %s/local/share/kytea/model.bin" % os.path.expanduser("~") + f"-model {os.path.expanduser('~')}/local/share/kytea/model.bin" ) except (AttributeError, ImportError): logger.error( @@ -954,7 +954,7 @@ class XLMTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] @@ -971,8 +971,8 @@ class XLMTokenizer(PreTrainedTokenizer): for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]): if index != token_index: logger.warning( - "Saving vocabulary to {}: BPE merge indices are not consecutive." - " Please check that the tokenizer is not corrupted!".format(merge_file) + f"Saving vocabulary to {merge_file}: BPE merge indices are not consecutive." + " Please check that the tokenizer is not corrupted!" ) index = token_index writer.write(" ".join(bpe_tokens) + "\n") diff --git a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py b/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py index 43a423b9ec..ba1d160ee2 100644 --- a/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py +++ b/src/transformers/models/xlm_prophetnet/tokenization_xlm_prophetnet.py @@ -153,7 +153,7 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer): self.fairseq_tokens_to_ids = {"[PAD]": 0, "[CLS]": 1, "[SEP]": 2, "[UNK]": 3, "[MASK]": 4} for i in range(10): - tok = "[unused{}]".format(i) + tok = f"[unused{i}]" self.fairseq_tokens_to_ids[tok] = 5 + i # The first "real" token "," has position 15 in the embedding vocab and position 3 in the spm vocab @@ -269,7 +269,7 @@ class XLMProphetNetTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py index 5d642ef431..4549d212ec 100644 --- a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py @@ -276,7 +276,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py index 0c68590439..9426d6c4aa 100644 --- a/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py +++ b/src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py @@ -230,7 +230,7 @@ class XLMRobertaTokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory.") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py b/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py index 0426b35c79..c2cabde0be 100755 --- a/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py +++ b/src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py @@ -55,7 +55,7 @@ def convert_xlnet_checkpoint_to_pytorch( finetuning_task = finetuning_task.lower() if finetuning_task is not None else "" if finetuning_task in GLUE_TASKS_NUM_LABELS: - print("Building PyTorch XLNetForSequenceClassification model from configuration: {}".format(str(config))) + print(f"Building PyTorch XLNetForSequenceClassification model from configuration: {config}") config.finetuning_task = finetuning_task config.num_labels = GLUE_TASKS_NUM_LABELS[finetuning_task] model = XLNetForSequenceClassification(config) @@ -71,9 +71,9 @@ def convert_xlnet_checkpoint_to_pytorch( # Save pytorch-model pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME) pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME) - print("Save PyTorch model to {}".format(os.path.abspath(pytorch_weights_dump_path))) + print(f"Save PyTorch model to {os.path.abspath(pytorch_weights_dump_path)}") torch.save(model.state_dict(), pytorch_weights_dump_path) - print("Save configuration file to {}".format(os.path.abspath(pytorch_config_dump_path))) + print(f"Save configuration file to {os.path.abspath(pytorch_config_dump_path)}") with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string()) diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 21348d3be7..215a516561 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -69,8 +69,8 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer): if config.d_model % config.n_head != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.d_model, config.n_head) + f"The hidden size ({config.d_model}) is not a multiple of the number of attention " + f"heads ({config.n_head}" ) self.n_head = config.n_head @@ -455,7 +455,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): self.word_embedding = TFSharedEmbeddings( config.vocab_size, config.d_model, initializer_range=config.initializer_range, name="word_embedding" ) - self.layer = [TFXLNetLayer(config, name="layer_._{}".format(i)) for i in range(config.n_layer)] + self.layer = [TFXLNetLayer(config, name=f"layer_._{i}") for i in range(config.n_layer)] self.dropout = tf.keras.layers.Dropout(config.dropout) self.use_mems_eval = config.use_mems_eval @@ -550,7 +550,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): # beg, end = klen - 1, -1 beg, end = klen, -1 else: - raise ValueError("Unknown `attn_type` {}.".format(self.attn_type)) + raise ValueError(f"Unknown `attn_type` {self.attn_type}.") if self.bi_data: fwd_pos_seq = tf.range(beg, end, -1.0) @@ -662,7 +662,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): elif self.attn_type == "bi": attn_mask = None else: - raise ValueError("Unsupported attention type: {}".format(self.attn_type)) + raise ValueError(f"Unsupported attention type: {self.attn_type}") # data mask: input mask & perm mask assert inputs["input_mask"] is None or inputs["attention_mask"] is None, ( diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index d60462ad0f..9d5813d21c 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -77,10 +77,10 @@ def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None): if ( hasattr(model, "logits_proj") and config.finetuning_task is not None - and "model/regression_{}/logit/kernel".format(config.finetuning_task) in tf_weights + and f"model/regression_{config.finetuning_task}/logit/kernel" in tf_weights ): - tf_to_pt_map["model/regression_{}/logit/kernel".format(config.finetuning_task)] = model.logits_proj.weight - tf_to_pt_map["model/regression_{}/logit/bias".format(config.finetuning_task)] = model.logits_proj.bias + tf_to_pt_map[f"model/regression_{config.finetuning_task}/logit/kernel"] = model.logits_proj.weight + tf_to_pt_map[f"model/regression_{config.finetuning_task}/logit/bias"] = model.logits_proj.bias # Now load the rest of the transformer model = model.transformer @@ -95,7 +95,7 @@ def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None): # Transformer blocks for i, b in enumerate(model.layer): - layer_str = "model/transformer/layer_%d/" % i + layer_str = f"model/transformer/layer_{i}/" tf_to_pt_map.update( { layer_str + "rel_attn/LayerNorm/gamma": b.rel_attn.layer_norm.weight, @@ -156,7 +156,7 @@ def load_tf_weights_in_xlnet(model, config, tf_path): init_vars = tf.train.list_variables(tf_path) tf_weights = {} for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) tf_weights[name] = array @@ -164,9 +164,9 @@ def load_tf_weights_in_xlnet(model, config, tf_path): tf_to_pt_map = build_tf_xlnet_to_pytorch_map(model, config, tf_weights) for name, pointer in tf_to_pt_map.items(): - logger.info("Importing {}".format(name)) + logger.info(f"Importing {name}") if name not in tf_weights: - logger.info("{} not in tf pre-trained weights, skipping".format(name)) + logger.info(f"{name} not in tf pre-trained weights, skipping") continue array = tf_weights[name] # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v @@ -188,7 +188,7 @@ def load_tf_weights_in_xlnet(model, config, tf_path): except AssertionError as e: e.args += (p_i.shape, arr_i.shape) raise - logger.info("Initialize PyTorch weight {} for layer {}".format(name, i)) + logger.info(f"Initialize PyTorch weight {name} for layer {i}") p_i.data = torch.from_numpy(arr_i) else: try: @@ -198,13 +198,13 @@ def load_tf_weights_in_xlnet(model, config, tf_path): except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) tf_weights.pop(name, None) tf_weights.pop(name + "/Adam", None) tf_weights.pop(name + "/Adam_1", None) - logger.info("Weights not copied to PyTorch model: {}".format(", ".join(tf_weights.keys()))) + logger.info(f"Weights not copied to PyTorch model: {', '.join(tf_weights.keys())}") return model @@ -214,8 +214,8 @@ class XLNetRelativeAttention(nn.Module): if config.d_model % config.n_head != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.d_model, config.n_head) + f"The hidden size ({config.d_model}) is not a multiple of the number of attention " + f"heads ({config.n_head}" ) self.n_head = config.n_head @@ -1041,7 +1041,7 @@ class XLNetModel(XLNetPreTrainedModel): # beg, end = klen - 1, -1 beg, end = klen, -1 else: - raise ValueError("Unknown `attn_type` {}.".format(self.attn_type)) + raise ValueError(f"Unknown `attn_type` {self.attn_type}.") if self.bi_data: fwd_pos_seq = torch.arange(beg, end, -1.0, dtype=torch.float) @@ -1145,7 +1145,7 @@ class XLNetModel(XLNetPreTrainedModel): elif self.attn_type == "bi": attn_mask = None else: - raise ValueError("Unsupported attention type: {}".format(self.attn_type)) + raise ValueError(f"Unsupported attention type: {self.attn_type}") # data mask: input mask & perm mask assert input_mask is None or attention_mask is None, "You can only use one of input_mask (uses 1 for padding) " diff --git a/src/transformers/models/xlnet/tokenization_xlnet.py b/src/transformers/models/xlnet/tokenization_xlnet.py index 054fbf7c4f..4980f450cb 100644 --- a/src/transformers/models/xlnet/tokenization_xlnet.py +++ b/src/transformers/models/xlnet/tokenization_xlnet.py @@ -314,7 +314,7 @@ class XLNetTokenizer(PreTrainedTokenizer): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/models/xlnet/tokenization_xlnet_fast.py b/src/transformers/models/xlnet/tokenization_xlnet_fast.py index e2ebd0cfbb..f3a46c2d78 100644 --- a/src/transformers/models/xlnet/tokenization_xlnet_fast.py +++ b/src/transformers/models/xlnet/tokenization_xlnet_fast.py @@ -254,7 +254,7 @@ class XLNetTokenizerFast(PreTrainedTokenizerFast): def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: if not os.path.isdir(save_directory): - logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) + logger.error(f"Vocabulary path ({save_directory}) should be a directory") return out_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"] diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index e9fee7fda4..5dd5ee0cb9 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -296,13 +296,13 @@ class AdamW(Optimizer): correct_bias: bool = True, ): if lr < 0.0: - raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr)) + raise ValueError(f"Invalid learning rate: {lr} - should be >= 0.0") if not 0.0 <= betas[0] < 1.0: - raise ValueError("Invalid beta parameter: {} - should be in [0.0, 1.0[".format(betas[0])) + raise ValueError(f"Invalid beta parameter: {betas[0]} - should be in [0.0, 1.0[") if not 0.0 <= betas[1] < 1.0: - raise ValueError("Invalid beta parameter: {} - should be in [0.0, 1.0[".format(betas[1])) + raise ValueError(f"Invalid beta parameter: {betas[1]} - should be in [0.0, 1.0[") if not 0.0 <= eps: - raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(eps)) + raise ValueError(f"Invalid epsilon value: {eps} - should be >= 0.0") defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, correct_bias=correct_bias) super().__init__(params, defaults) diff --git a/src/transformers/optimization_tf.py b/src/transformers/optimization_tf.py index d3bb551aeb..77cd0d1c60 100644 --- a/src/transformers/optimization_tf.py +++ b/src/transformers/optimization_tf.py @@ -333,7 +333,7 @@ class GradientAccumulator(object): ] ) if len(gradients) != len(self._gradients): - raise ValueError("Expected %s gradients, but got %d" % (len(self._gradients), len(gradients))) + raise ValueError(f"Expected {len(self._gradients)} gradients, but got {len(gradients)}") for accum_gradient, gradient in zip(self._gradients, gradients): if accum_gradient is not None and gradient is not None: diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 0e4d4a754d..638ac6ecef 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -231,10 +231,10 @@ def check_task(task: str) -> Tuple[Dict, Any]: if len(tokens) == 4 and tokens[0] == "translation" and tokens[2] == "to": targeted_task = SUPPORTED_TASKS["translation"] return targeted_task, (tokens[1], tokens[3]) - raise KeyError("Invalid translation task {}, use 'translation_XX_to_YY' format".format(task)) + raise KeyError(f"Invalid translation task {task}, use 'translation_XX_to_YY' format") raise KeyError( - "Unknown task {}, available tasks are {}".format(task, list(SUPPORTED_TASKS.keys()) + ["translation_XX_to_YY"]) + f"Unknown task {task}, available tasks are {list(SUPPORTED_TASKS.keys()) + ['translation_XX_to_YY']}" ) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 9f582db4b8..9da13796f5 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -159,7 +159,7 @@ def get_default_model(targeted_task: Dict, framework: Optional[str], task_option defaults = targeted_task["default"] if task_options: if task_options not in defaults: - raise ValueError("The task does not provide any default models for options {}".format(task_options)) + raise ValueError(f"The task does not provide any default models for options {task_options}") default_models = defaults[task_options]["model"] elif "model" in defaults: default_models = targeted_task["default"]["model"] @@ -240,11 +240,11 @@ class PipelineDataFormat: if output_path is not None and not overwrite: if exists(abspath(self.output_path)): - raise OSError("{} already exists on disk".format(self.output_path)) + raise OSError(f"{self.output_path} already exists on disk") if input_path is not None: if not exists(abspath(self.input_path)): - raise OSError("{} doesnt exist on disk".format(self.input_path)) + raise OSError(f"{self.input_path} doesnt exist on disk") @abstractmethod def __iter__(self): @@ -313,7 +313,7 @@ class PipelineDataFormat: elif format == "pipe": return PipedPipelineDataFormat(output_path, input_path, column, overwrite=overwrite) else: - raise KeyError("Unknown reader {} (Available reader are json/csv/pipe)".format(format)) + raise KeyError(f"Unknown reader {format} (Available reader are json/csv/pipe)") class CsvPipelineDataFormat(PipelineDataFormat): @@ -537,7 +537,7 @@ class Pipeline(_ScikitCompat): self.tokenizer = tokenizer self.modelcard = modelcard self.framework = framework - self.device = device if framework == "tf" else torch.device("cpu" if device < 0 else "cuda:{}".format(device)) + self.device = device if framework == "tf" else torch.device("cpu" if device < 0 else f"cuda:{device}") self.binary_output = binary_output # Special handling @@ -558,7 +558,7 @@ class Pipeline(_ScikitCompat): A path to the directory where to saved. It will be created if it doesn't exist. """ if os.path.isfile(save_directory): - logger.error("Provided path ({}) should be a directory, not a file".format(save_directory)) + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") return os.makedirs(save_directory, exist_ok=True) @@ -596,7 +596,7 @@ class Pipeline(_ScikitCompat): output = pipe(...) """ if self.framework == "tf": - with tf.device("/CPU:0" if self.device == -1 else "/device:GPU:{}".format(self.device)): + with tf.device("/CPU:0" if self.device == -1 else f"/device:GPU:{self.device}"): yield else: if self.device.type == "cuda": diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py index 127abdfed0..c77d2141c9 100644 --- a/src/transformers/pipelines/conversational.py +++ b/src/transformers/pipelines/conversational.py @@ -94,15 +94,14 @@ class Conversation: if self.new_user_input: if overwrite: logger.warning( - 'User input added while unprocessed input was existing: "{}" was overwritten with: "{}".'.format( - self.new_user_input, text - ) + f'User input added while unprocessed input was existing: "{self.new_user_input}" was overwritten ' + f'with: "{text}".' ) self.new_user_input = text else: logger.warning( - 'User input added while unprocessed input was existing: "{}" new input ignored: "{}". ' - "Set `overwrite` to True to overwrite unprocessed user input".format(self.new_user_input, text) + f'User input added while unprocessed input was existing: "{self.new_user_input}" new input ' + f'ignored: "{text}". Set `overwrite` to True to overwrite unprocessed user input' ) else: self.new_user_input = text @@ -148,10 +147,10 @@ class Conversation: Example: Conversation id: 7d15686b-dc94-49f2-9c4b-c9eac6a1f114 user >> Going to the movies tonight - any suggestions? bot >> The Big Lebowski """ - output = "Conversation id: {} \n".format(self.uuid) + output = f"Conversation id: {self.uuid} \n" for is_user, text in self.iter_texts(): name = "user" if is_user else "bot" - output += "{} >> {} \n".format(name, text) + output += f"{name} >> {text} \n" return output @@ -232,10 +231,8 @@ class ConversationalPipeline(Pipeline): ), "ConversationalPipeline expects a Conversation or list of Conversations as an input" if conversation.new_user_input is None: raise ValueError( - "Conversation with UUID {} does not contain new user input to process. " - "Add user inputs with the conversation's `add_user_input` method".format( - type(conversation.uuid) - ) + f"Conversation with UUID {type(conversation.uuid)} does not contain new user input to process. " + "Add user inputs with the conversation's `add_user_input` method" ) assert ( self.tokenizer.pad_token_id is not None or self.tokenizer.eos_token_id is not None diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py index 251c7f0973..86ce54b3e9 100644 --- a/src/transformers/pipelines/fill_mask.py +++ b/src/transformers/pipelines/fill_mask.py @@ -129,9 +129,8 @@ class FillMaskPipeline(Pipeline): target_enc = self.tokenizer.tokenize(target) if len(target_enc) > 1 or target_enc[0] == self.tokenizer.unk_token: logger.warning( - "The specified target token `{}` does not exist in the model vocabulary. Replacing with `{}`.".format( - target, target_enc[0] - ) + f"The specified target token `{target}` does not exist in the model vocabulary. " + f"Replacing with `{target_enc[0]}`." ) targets_proc.append(target_enc[0]) target_inds = np.array(self.tokenizer.convert_tokens_to_ids(targets_proc)) diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py index d0b16a8cab..0008f78c58 100644 --- a/src/transformers/pipelines/question_answering.py +++ b/src/transformers/pipelines/question_answering.py @@ -42,12 +42,12 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler): if k not in item: raise KeyError("You need to provide a dictionary with keys {question:..., context:...}") elif item[k] is None: - raise ValueError("`{}` cannot be None".format(k)) + raise ValueError(f"`{k}` cannot be None") elif isinstance(item[k], str) and len(item[k]) == 0: - raise ValueError("`{}` cannot be empty".format(k)) + raise ValueError(f"`{k}` cannot be empty") return QuestionAnsweringPipeline.create_sample(**item) - raise ValueError("{} argument needs to be of type (SquadExample, dict)".format(item)) + raise ValueError(f"{item} argument needs to be of type (SquadExample, dict)") def __call__(self, *args, **kwargs): # Detect where the actual inputs are @@ -77,7 +77,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler): else: raise ValueError("Arguments can't be understood") else: - raise ValueError("Unknown arguments {}".format(kwargs)) + raise ValueError(f"Unknown arguments {kwargs}") # Normalize inputs if isinstance(inputs, dict): @@ -86,7 +86,7 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler): # Copy to avoid overriding arguments inputs = [i for i in inputs] else: - raise ValueError("Invalid arguments {}".format(inputs)) + raise ValueError(f"Invalid arguments {kwargs}") for i, item in enumerate(inputs): inputs[i] = self.normalize(item) @@ -210,10 +210,10 @@ class QuestionAnsweringPipeline(Pipeline): kwargs.setdefault("handle_impossible_answer", False) if kwargs["topk"] < 1: - raise ValueError("topk parameter should be >= 1 (got {})".format(kwargs["topk"])) + raise ValueError(f"topk parameter should be >= 1 (got {kwargs['topk']})") if kwargs["max_answer_len"] < 1: - raise ValueError("max_answer_len parameter should be >= 1 (got {})".format(kwargs["max_answer_len"])) + raise ValueError(f"max_answer_len parameter should be >= 1 (got {(kwargs['max_answer_len'])}") # Convert inputs to features examples = self._args_parser(*args, **kwargs) diff --git a/src/transformers/pipelines/text2text_generation.py b/src/transformers/pipelines/text2text_generation.py index 3fb7d00c6e..bda4457ea8 100644 --- a/src/transformers/pipelines/text2text_generation.py +++ b/src/transformers/pipelines/text2text_generation.py @@ -101,9 +101,7 @@ class Text2TextGenerationPipeline(Pipeline): padding = False else: raise ValueError( - " `args[0]`: {} have the wrong format. The should be either of type `str` or type `list`".format( - args[0] - ) + f" `args[0]`: {args[0]} have the wrong format. The should be either of type `str` or type `list`" ) with self.device_placement(): @@ -198,16 +196,14 @@ class SummarizationPipeline(Text2TextGenerationPipeline): """ if input_length < min_length // 2: logger.warning( - "Your min_length is set to {}, but you input_length is only {}. You might consider decreasing min_length manually, e.g. summarizer('...', min_length=10)".format( - min_length, input_length - ) + f"Your min_length is set to {min_length}, but you input_length is only {input_length}. You might " + "consider decreasing min_length manually, e.g. summarizer('...', min_length=10)" ) if input_length < max_length: logger.warning( - "Your max_length is set to {}, but you input_length is only {}. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)".format( - max_length, input_length - ) + f"Your max_length is set to {max_length}, but you input_length is only {input_length}. You might " + "consider decreasing max_length manually, e.g. summarizer('...', max_length=50)" ) @@ -234,9 +230,8 @@ class TranslationPipeline(Text2TextGenerationPipeline): def check_inputs(self, input_length: int, min_length: int, max_length: int): if input_length > 0.9 * max_length: logger.warning( - "Your input_length: {} is bigger than 0.9 * max_length: {}. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)".format( - input_length, max_length - ) + f"Your input_length: {input_length} is bigger than 0.9 * max_length: {max_length}. You might consider " + "increasing your max_length manually, e.g. translator('...', max_length=400)" ) def __call__(self, *args, **kwargs): diff --git a/src/transformers/sagemaker/trainer_sm.py b/src/transformers/sagemaker/trainer_sm.py index f11b52f8f7..1ea9a8f40b 100644 --- a/src/transformers/sagemaker/trainer_sm.py +++ b/src/transformers/sagemaker/trainer_sm.py @@ -176,7 +176,7 @@ class SageMakerTrainer(Trainer): return output_dir = output_dir if output_dir is not None else self.args.output_dir os.makedirs(output_dir, exist_ok=True) - logger.info("Saving model checkpoint to %s", output_dir) + logger.info(f"Saving model checkpoint to {output_dir}") # Calling the state_dict needs to be done on the wrapped model state_dict = self.model_wrapped.state_dict() diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index c8e53d9611..3f1273a7c9 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -62,7 +62,7 @@ def parse_flag_from_env(key, default=False): _value = strtobool(value) except ValueError: # More values are supported, but let's keep the message simple. - raise ValueError("If set, {} must be yes or no.".format(key)) + raise ValueError(f"If set, {key} must be yes or no.") return _value @@ -75,7 +75,7 @@ def parse_int_from_env(key, default=None): try: _value = int(value) except ValueError: - raise ValueError("If set, {} must be a int.".format(key)) + raise ValueError(f"If set, {key} must be a int.") return _value diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 5ae55b80f2..b7048b2401 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -190,7 +190,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): ): tokens_to_add.append(token) if self.verbose: - logger.info("Adding %s to the vocabulary", token) + logger.info(f"Adding {token} to the vocabulary") added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add)) added_tok_decoder = {v: k for k, v in added_tok_encoder.items()} diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 7d388d170b..449a88d24f 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -685,7 +685,7 @@ class BatchEncoding(UserDict): # (mfuntowicz: This code is unreachable) # else: # raise ImportError( - # "Unable to convert output to tensors format {}".format(tensor_type) + # f"Unable to convert output to tensors format {tensor_type}" # ) # Do the tensor conversion in batch @@ -805,9 +805,7 @@ class SpecialTokensMixin: elif isinstance(value, (str, AddedToken)): setattr(self, key, value) else: - raise TypeError( - "special token {} has to be either str or AddedToken but got: {}".format(key, type(value)) - ) + raise TypeError(f"special token {key} has to be either str or AddedToken but got: {type(value)}") def sanitize_special_tokens(self) -> int: """ @@ -872,7 +870,7 @@ class SpecialTokensMixin: assert key in self.SPECIAL_TOKENS_ATTRIBUTES, f"Key {key} is not a special token" if self.verbose: - logger.info("Assigning %s to the %s key of the tokenizer", value, key) + logger.info(f"Assigning {value} to the {key} key of the tokenizer") setattr(self, key, value) if key == "additional_special_tokens": @@ -1866,7 +1864,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): A tuple of :obj:`str`: The files saved. """ if os.path.isfile(save_directory): - logger.error("Provided path ({}) should be a directory, not a file".format(save_directory)) + logger.error(f"Provided path ({save_directory}) should be a directory, not a file") return os.makedirs(save_directory, exist_ok=True) @@ -3137,8 +3135,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): if not self.deprecation_warnings.get("sequence-length-is-longer-than-the-specified-maximum", False): logger.warning( "Token indices sequence length is longer than the specified maximum sequence length " - "for this model ({} > {}). Running this sequence through the model will result in " - "indexing errors".format(len(ids), self.model_max_length) + f"for this model ({len(ids)} > {self.model_max_length}). Running this sequence through the model " + "will result in indexing errors" ) self.deprecation_warnings["sequence-length-is-longer-than-the-specified-maximum"] = True diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 1f476585b0..901447d568 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -362,9 +362,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): ) -> BatchEncoding: if not isinstance(batch_text_or_text_pairs, list): - raise TypeError( - "batch_text_or_text_pairs has to be a list (got {})".format(type(batch_text_or_text_pairs)) - ) + raise TypeError(f"batch_text_or_text_pairs has to be a list (got {type(batch_text_or_text_pairs)})") # Set the truncation and padding strategy and restore the initial configuration self.set_truncation_and_padding( diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index e8e157a8a6..27b1ed90fa 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -1567,7 +1567,7 @@ class Trainer: def _save_tpu(self, output_dir: Optional[str] = None): output_dir = output_dir if output_dir is not None else self.args.output_dir - logger.info("Saving model checkpoint to %s", output_dir) + logger.info(f"Saving model checkpoint to {output_dir}") if xm.is_master_ordinal(): os.makedirs(output_dir, exist_ok=True) @@ -1597,7 +1597,7 @@ class Trainer: # If we are executing this function, we are the process zero, so we don't check for that. output_dir = output_dir if output_dir is not None else self.args.output_dir os.makedirs(output_dir, exist_ok=True) - logger.info("Saving model checkpoint to %s", output_dir) + logger.info(f"Saving model checkpoint to {output_dir}") # Save a trained model and configuration using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` if not isinstance(self.model, PreTrainedModel): @@ -1664,7 +1664,7 @@ class Trainer: number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - self.args.save_total_limit) checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete] for checkpoint in checkpoints_to_be_deleted: - logger.info("Deleting older checkpoint [{}] due to args.save_total_limit".format(checkpoint)) + logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit") shutil.rmtree(checkpoint) def evaluate( @@ -1814,9 +1814,9 @@ class Trainer: batch_size = dataloader.batch_size num_examples = self.num_examples(dataloader) - logger.info("***** Running %s *****", description) - logger.info(" Num examples = %d", num_examples) - logger.info(" Batch size = %d", batch_size) + logger.info(f"***** Running {description} *****") + logger.info(f" Num examples = {num_examples}") + logger.info(f" Batch size = {batch_size}") losses_host: torch.Tensor = None preds_host: Union[torch.Tensor, List[torch.Tensor]] = None labels_host: Union[torch.Tensor, List[torch.Tensor]] = None diff --git a/src/transformers/trainer_tf.py b/src/transformers/trainer_tf.py index 184845b85c..3638aac62d 100644 --- a/src/transformers/trainer_tf.py +++ b/src/transformers/trainer_tf.py @@ -303,11 +303,11 @@ class TFTrainer: prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only ) - logger.info("***** Running %s *****", description) - logger.info(" Num examples in dataset = %d", num_examples) + logger.info(f"***** Running {description} *****") + logger.info(f" Num examples in dataset = {num_examples}") if description == "Evaluation": - logger.info(" Num examples in used in evaluation = %d", self.args.eval_batch_size * steps) - logger.info(" Batch size = %d", self.args.eval_batch_size) + logger.info(f" Num examples in used in evaluation = {self.args.eval_batch_size * steps}") + logger.info(f" Batch size = {self.args.eval_batch_size}") label_ids: np.ndarray = None preds: np.ndarray = None @@ -504,7 +504,7 @@ class TFTrainer: if self.model.ckpt_manager.latest_checkpoint: logger.info( - "Checkpoint file %s found and restoring from checkpoint", self.model.ckpt_manager.latest_checkpoint + f"Checkpoint file {self.model.ckpt_manager.latest_checkpoint} found and restoring from checkpoint" ) ckpt.restore(self.model.ckpt_manager.latest_checkpoint).expect_partial() @@ -514,9 +514,9 @@ class TFTrainer: steps_trained_in_current_epoch = self.global_step % self.steps_per_epoch logger.info(" Continuing training from checkpoint, will skip to saved global_step") - logger.info(" Continuing training from epoch %d", epochs_trained) - logger.info(" Continuing training from global step %d", self.global_step) - logger.info(" Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch) + logger.info(f" Continuing training from epoch {epochs_trained}") + logger.info(f" Continuing training from global step {self.global_step}") + logger.info(f" Will skip the first {steps_trained_in_current_epoch} steps in the first epoch") tf.summary.experimental.set_step(self.global_step) @@ -526,16 +526,16 @@ class TFTrainer: self.tb_writer.flush() logger.info("***** Running training *****") - logger.info(" Num examples = %d", self.num_train_examples) + logger.info(f" Num examples = {self.num_train_examples}") # TODO: We might want to print a more precise ``epochs`` if self.args.max_steps > 0 ? - logger.info(" Num Epochs = %d", epochs) - logger.info(" Instantaneous batch size per device = %d", self.args.per_device_train_batch_size) + logger.info(f" Num Epochs = {epochs}") + logger.info(f" Instantaneous batch size per device = {self.args.per_device_train_batch_size}") logger.info( - " Total train batch size (w. parallel, distributed & accumulation) = %d", self.total_train_batch_size + f" Total train batch size (w. parallel, distributed & accumulation) = {self.total_train_batch_size}" ) - logger.info(" Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps) - logger.info(" Steps per epoch = %d", self.steps_per_epoch) - logger.info(" Total optimization steps = %d", t_total) + logger.info(f" Gradient Accumulation steps = {self.args.gradient_accumulation_steps}") + logger.info(f" Steps per epoch = {self.steps_per_epoch}") + logger.info(f" Total optimization steps = {t_total}") self.train_loss = tf.keras.metrics.Sum() start_time = datetime.datetime.now() @@ -592,7 +592,7 @@ class TFTrainer: if self.args.save_steps > 0 and self.global_step % self.args.save_steps == 0: ckpt_save_path = self.model.ckpt_manager.save() - logger.info("Saving checkpoint for step {} at {}".format(self.global_step, ckpt_save_path)) + logger.info(f"Saving checkpoint for step {self.global_step} at {ckpt_save_path}") if self.args.max_steps > 0 and self.global_step >= t_total: break @@ -607,7 +607,7 @@ class TFTrainer: end_time = datetime.datetime.now() - logger.info("Training took: {}".format(str(end_time - start_time))) + logger.info(f"Training took: {str(end_time - start_time)}") if self.args.past_index and hasattr(self, "_past"): # Clean the state at the end of training @@ -782,7 +782,7 @@ class TFTrainer: """ output_dir = output_dir if output_dir is not None else self.args.output_dir - logger.info("Saving model in {}".format(output_dir)) + logger.info(f"Saving model in {output_dir}") if not isinstance(self.model, TFPreTrainedModel): raise ValueError("Trainer.model appears to not be a PreTrainedModel") diff --git a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py index 33d87345b1..d40328b925 100755 --- a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py +++ b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py @@ -236,7 +236,7 @@ def main(): # Set the verbosity to info of the Transformers logger (on main process only): if is_main_process(training_args.local_rank): transformers.utils.logging.set_verbosity_info() - logger.info("Training/evaluation parameters %s", training_args) + logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index b1c042dac9..6b04672db1 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -357,7 +357,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs): super().__init__(**kwargs) - self.layer = [TF{{cookiecutter.camelcase_modelname}}Layer(config, name="layer_._{}".format(i)) for i in range(config.num_hidden_layers)] + self.layer = [TF{{cookiecutter.camelcase_modelname}}Layer(config, name=f"layer_._{i}") for i in range(config.num_hidden_layers)] def call( self, diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py index 7b969055c2..e8e0d56a4d 100755 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py @@ -78,13 +78,13 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch ) raise tf_path = os.path.abspath(tf_checkpoint_path) - logger.info("Converting TensorFlow checkpoint from {}".format(tf_path)) + logger.info(f"Converting TensorFlow checkpoint from {tf_path}") # Load weights from TF model init_vars = tf.train.list_variables(tf_path) names = [] arrays = [] for name, shape in init_vars: - logger.info("Loading TF weight {} with shape {}".format(name, shape)) + logger.info(f"Loading TF weight {name} with shape {shape}") array = tf.train.load_variable(tf_path, name) names.append(name) arrays.append(array) @@ -97,7 +97,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"] for n in name ): - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue pointer = model for m_name in name: @@ -117,7 +117,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch try: pointer = getattr(pointer, scope_names[0]) except AttributeError: - logger.info("Skipping {}".format("/".join(name))) + logger.info(f"Skipping {'/'.join(name)}") continue if len(scope_names) >= 2: num = int(scope_names[1]) @@ -133,7 +133,7 @@ def load_tf_weights_in_{{cookiecutter.lowercase_modelname}}(model, config, tf_ch except AssertionError as e: e.args += (pointer.shape, array.shape) raise - logger.info("Initialize PyTorch weight {}".format(name)) + logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) return model @@ -196,8 +196,8 @@ class {{cookiecutter.camelcase_modelname}}SelfAttention(nn.Module): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention " + f"heads ({config.num_attention_heads})" ) self.num_attention_heads = config.num_attention_heads diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py index 90cdd1cc38..c352809f0a 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/test_modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -585,10 +585,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/sagemaker/scripts/tensorflow/run_tf.py b/tests/sagemaker/scripts/tensorflow/run_tf.py index 21716e996c..a47e76c09d 100644 --- a/tests/sagemaker/scripts/tensorflow/run_tf.py +++ b/tests/sagemaker/scripts/tensorflow/run_tf.py @@ -86,6 +86,6 @@ if __name__ == "__main__": end_train_time = time.time() - start_train_time logger.info("*** Train ***") - logger.info("train_runtime = %s", end_train_time) + logger.info(f"train_runtime = {end_train_time}") for key, value in train_results.history.items(): - logger.info(" %s = %s", key, value) + logger.info(f" {key} = {value}") diff --git a/tests/sagemaker/scripts/tensorflow/run_tf_dist.py b/tests/sagemaker/scripts/tensorflow/run_tf_dist.py index 7bfe76571a..0c1838ce9a 100644 --- a/tests/sagemaker/scripts/tensorflow/run_tf_dist.py +++ b/tests/sagemaker/scripts/tensorflow/run_tf_dist.py @@ -157,7 +157,7 @@ if __name__ == "__main__": ) end_train_time = time.time() - start_train_time logger.info("*** Train ***") - logger.info("train_runtime = %s", end_train_time) + logger.info(f"train_runtime = {end_train_time}") output_eval_file = os.path.join(args.output_dir, "train_results.txt") @@ -166,8 +166,8 @@ if __name__ == "__main__": logger.info("***** Train results *****") logger.info(train_results) for key, value in train_results.items(): - logger.info(" %s = %s", key, value) - writer.write("%s = %s\n" % (key, value)) + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") # Evaluation if args.do_eval and (not SDP_ENABLED or sdp.rank() == 0): @@ -181,8 +181,8 @@ if __name__ == "__main__": logger.info("***** Eval results *****") logger.info(result) for key, value in result.items(): - logger.info(" %s = %s", key, value) - writer.write("%s = %s\n" % (key, value)) + logger.info(f" {key} = {value}") + writer.write(f"{key} = {value}\n") # Save result if SDP_ENABLED: diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index 7815e0f8ab..3e42fa20d5 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -31,8 +31,8 @@ PASS = "__DUMMY_TRANSFORMERS_PASS__" ENDPOINT_STAGING = "https://moon-staging.huggingface.co" ENDPOINT_STAGING_BASIC_AUTH = f"https://{USER}:{PASS}@moon-staging.huggingface.co" -REPO_NAME = "my-model-{}".format(int(time.time())) -REPO_NAME_LARGE_FILE = "my-model-largefiles-{}".format(int(time.time())) +REPO_NAME = f"my-model-{int(time.time())}" +REPO_NAME_LARGE_FILE = f"my-model-largefiles-{int(time.time())}" WORKING_REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/working_repo") LARGE_FILE_14MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.epub" LARGE_FILE_18MB = "https://cdn-media.huggingface.co/lfs-largefiles/progit.pdf" @@ -95,7 +95,7 @@ class HfFolderTest(unittest.TestCase): Test the whole token save/get/delete workflow, with the desired behavior with respect to non-existent tokens. """ - token = "token-{}".format(int(time.time())) + token = f"token-{int(time.time())}" HfFolder.save_token(token) self.assertEqual(HfFolder.get_token(), token) HfFolder.delete_token() diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 96f5d505ad..402691dc98 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -172,7 +172,7 @@ class ModelTesterMixin: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], - msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), + msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) def test_determinism(self): @@ -928,7 +928,7 @@ class ModelTesterMixin: model.base_model.save_pretrained(temp_dir_name) model, loading_info = model_class.from_pretrained(temp_dir_name, output_loading_info=True) - with self.subTest(msg="Missing keys for {}".format(model.__class__.__name__)): + with self.subTest(msg=f"Missing keys for {model.__class__.__name__}"): self.assertGreater(len(loading_info["missing_keys"]), 0) def test_tie_model_weights(self): diff --git a/tests/test_modeling_fsmt.py b/tests/test_modeling_fsmt.py index f4c7c8b5bc..708ef1dc94 100644 --- a/tests/test_modeling_fsmt.py +++ b/tests/test_modeling_fsmt.py @@ -365,10 +365,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/test_modeling_rag.py b/tests/test_modeling_rag.py index 6a31dcfa41..371542b4da 100644 --- a/tests/test_modeling_rag.py +++ b/tests/test_modeling_rag.py @@ -74,7 +74,7 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) + msg = f"{a} != {b}" if prefix: msg = prefix + ": " + msg raise AssertionError(msg) diff --git a/tests/test_modeling_tf_auto.py b/tests/test_modeling_tf_auto.py index 432ab15e52..ff80adc369 100644 --- a/tests/test_modeling_tf_auto.py +++ b/tests/test_modeling_tf_auto.py @@ -195,8 +195,6 @@ class TFAutoModelTest(unittest.TestCase): mapping = tuple(mapping.items()) for index, (child_config, child_model) in enumerate(mapping[1:]): for parent_config, parent_model in mapping[: index + 1]: - with self.subTest( - msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__) - ): + with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"): self.assertFalse(issubclass(child_config, parent_config)) self.assertFalse(issubclass(child_model, parent_model)) diff --git a/tests/test_modeling_tf_bart.py b/tests/test_modeling_tf_bart.py index 3aef4c03f9..33aad30be9 100644 --- a/tests/test_modeling_tf_bart.py +++ b/tests/test_modeling_tf_bart.py @@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/test_modeling_tf_blenderbot.py b/tests/test_modeling_tf_blenderbot.py index aa672a970c..39e448f277 100644 --- a/tests/test_modeling_tf_blenderbot.py +++ b/tests/test_modeling_tf_blenderbot.py @@ -287,10 +287,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/test_modeling_tf_blenderbot_small.py b/tests/test_modeling_tf_blenderbot_small.py index 850fb3357b..fc49288abf 100644 --- a/tests/test_modeling_tf_blenderbot_small.py +++ b/tests/test_modeling_tf_blenderbot_small.py @@ -289,10 +289,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/test_modeling_tf_led.py b/tests/test_modeling_tf_led.py index 29e1e1d6d5..a10ceb6f2d 100644 --- a/tests/test_modeling_tf_led.py +++ b/tests/test_modeling_tf_led.py @@ -380,10 +380,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/test_modeling_tf_marian.py b/tests/test_modeling_tf_marian.py index 55175f9d66..ccea3b79cf 100644 --- a/tests/test_modeling_tf_marian.py +++ b/tests/test_modeling_tf_marian.py @@ -320,10 +320,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/test_modeling_tf_mbart.py b/tests/test_modeling_tf_mbart.py index 228fe6a57b..502be625e7 100644 --- a/tests/test_modeling_tf_mbart.py +++ b/tests/test_modeling_tf_mbart.py @@ -291,10 +291,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/test_modeling_tf_pegasus.py b/tests/test_modeling_tf_pegasus.py index a812b90590..2be4556425 100644 --- a/tests/test_modeling_tf_pegasus.py +++ b/tests/test_modeling_tf_pegasus.py @@ -318,10 +318,9 @@ def _assert_tensors_equal(a, b, atol=1e-12, prefix=""): return True raise except Exception: - msg = "{} != {}".format(a, b) - if prefix: - msg = prefix + ": " + msg - raise AssertionError(msg) + if len(prefix) > 0: + prefix = f"{prefix}: " + raise AssertionError(f"{prefix}{a} != {b}") def _long_tensor(tok_lst): diff --git a/tests/test_modeling_wav2vec2.py b/tests/test_modeling_wav2vec2.py index 434526c749..abb57eb9af 100644 --- a/tests/test_modeling_wav2vec2.py +++ b/tests/test_modeling_wav2vec2.py @@ -320,13 +320,13 @@ class Wav2Vec2ModelTest(ModelTesterMixin, unittest.TestCase): if "conv.weight" in name or "masked_spec_embed" in name: self.assertTrue( -1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0, - msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), + msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) else: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], - msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), + msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) @slow @@ -437,13 +437,13 @@ class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase): if "conv.weight" in name or "masked_spec_embed" in name: self.assertTrue( -1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0, - msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), + msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) else: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], - msg="Parameter {} of model {} seems not properly initialized".format(name, model_class), + msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) @slow diff --git a/tests/test_tokenization_auto.py b/tests/test_tokenization_auto.py index d632cbc558..64c3e72eff 100644 --- a/tests/test_tokenization_auto.py +++ b/tests/test_tokenization_auto.py @@ -101,9 +101,7 @@ class AutoTokenizerTest(unittest.TestCase): mapping = tuple(mapping.items()) for index, (child_config, _) in enumerate(mapping[1:]): for parent_config, _ in mapping[: index + 1]: - with self.subTest( - msg="Testing if {} is child of {}".format(child_config.__name__, parent_config.__name__) - ): + with self.subTest(msg=f"Testing if {child_config.__name__} is child of {parent_config.__name__}"): self.assertFalse(issubclass(child_config, parent_config)) @require_tokenizers diff --git a/tests/test_tokenization_bart.py b/tests/test_tokenization_bart.py index 1e5574e9dd..2a28957268 100644 --- a/tests/test_tokenization_bart.py +++ b/tests/test_tokenization_bart.py @@ -154,7 +154,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase): def test_embeded_special_tokens(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) sentence = "A, AllenNLP sentence." diff --git a/tests/test_tokenization_bert.py b/tests/test_tokenization_bert.py index 837ef08c34..3b8dced0ab 100644 --- a/tests/test_tokenization_bert.py +++ b/tests/test_tokenization_bert.py @@ -250,7 +250,7 @@ class BertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_offsets_with_special_characters(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence." diff --git a/tests/test_tokenization_bertweet.py b/tests/test_tokenization_bertweet.py index 66de1ff6af..14d926e094 100644 --- a/tests/test_tokenization_bertweet.py +++ b/tests/test_tokenization_bertweet.py @@ -38,7 +38,7 @@ class BertweetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): self.merges_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["merges_file"]) with open(self.vocab_file, "w", encoding="utf-8") as fp: for token in vocab_tokens: - fp.write("{} {}".format(token, vocab_tokens[token]) + "\n") + fp.write(f"{token} {vocab_tokens[token]}\n") with open(self.merges_file, "w", encoding="utf-8") as fp: fp.write("\n".join(merges)) diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index 995b56b00e..7aa1bbf443 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -1216,18 +1216,18 @@ class TokenizerTesterMixin: empty_tokens = tokenizer("", padding=True, pad_to_multiple_of=8) normal_tokens = tokenizer("This is a sample input", padding=True, pad_to_multiple_of=8) for key, value in empty_tokens.items(): - self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) + self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8") for key, value in normal_tokens.items(): - self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) + self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8") normal_tokens = tokenizer("This", pad_to_multiple_of=8) for key, value in normal_tokens.items(): - self.assertNotEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) + self.assertNotEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8") # Should also work with truncation normal_tokens = tokenizer("This", padding=True, truncation=True, pad_to_multiple_of=8) for key, value in normal_tokens.items(): - self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) + self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8") # truncation to something which is not a multiple of pad_to_multiple_of raises an error self.assertRaises( @@ -1897,7 +1897,7 @@ class TokenizerTesterMixin: def test_is_fast(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) @@ -1907,7 +1907,7 @@ class TokenizerTesterMixin: def test_fast_only_inputs(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) # Ensure None raise an error @@ -1918,7 +1918,7 @@ class TokenizerTesterMixin: def test_alignement_methods(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) words = ["Wonderful", "no", "inspiration", "example", "with", "subtoken"] @@ -2144,7 +2144,7 @@ class TokenizerTesterMixin: def test_tokenization_python_rust_equals(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) @@ -2181,7 +2181,7 @@ class TokenizerTesterMixin: def test_num_special_tokens_to_add_equal(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) @@ -2195,7 +2195,7 @@ class TokenizerTesterMixin: def test_max_length_equal(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) @@ -2205,7 +2205,7 @@ class TokenizerTesterMixin: def test_special_tokens_map_equal(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) @@ -2217,7 +2217,7 @@ class TokenizerTesterMixin: def test_add_tokens(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) vocab_size = len(tokenizer_r) @@ -2239,7 +2239,7 @@ class TokenizerTesterMixin: def test_offsets_mapping(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) text = "Wonderful no inspiration example with subtoken" @@ -2285,9 +2285,7 @@ class TokenizerTesterMixin: for tokenizer, pretrained_name, kwargs in self.tokenizers_list: tokenizer = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) - with self.subTest( - "{} ({}, {})".format(tokenizer.__class__.__name__, pretrained_name, tokenizer.__class__.__name__) - ): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name}, {tokenizer.__class__.__name__})"): if is_torch_available(): returned_tensor = "pt" @@ -2341,7 +2339,7 @@ class TokenizerTesterMixin: def test_compare_pretokenized_inputs(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) @@ -2419,7 +2417,7 @@ class TokenizerTesterMixin: def test_create_token_type_ids(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) input_simple = [1, 2, 3] @@ -2437,7 +2435,7 @@ class TokenizerTesterMixin: def test_build_inputs_with_special_tokens(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) # # Input string @@ -2470,7 +2468,7 @@ class TokenizerTesterMixin: def test_padding(self, max_length=50): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) @@ -2688,7 +2686,7 @@ class TokenizerTesterMixin: def test_padding_different_model_input_name(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) self.assertEqual(tokenizer_p.pad_token_id, tokenizer_r.pad_token_id) @@ -2722,7 +2720,7 @@ class TokenizerTesterMixin: def test_save_pretrained(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) @@ -2747,7 +2745,7 @@ class TokenizerTesterMixin: def test_embeded_special_tokens(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) sentence = "A, AllenNLP sentence." @@ -2772,7 +2770,7 @@ class TokenizerTesterMixin: def test_compare_add_special_tokens(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) simple_num_special_tokens_to_add = tokenizer_r.num_special_tokens_to_add(pair=False) @@ -2811,7 +2809,7 @@ class TokenizerTesterMixin: def test_compare_prepare_for_model(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) string_sequence = "Asserting that both tokenizers are equal" diff --git a/tests/test_tokenization_gpt2.py b/tests/test_tokenization_gpt2.py index ee669b4d24..8d70d8814e 100644 --- a/tests/test_tokenization_gpt2.py +++ b/tests/test_tokenization_gpt2.py @@ -133,7 +133,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_padding(self, max_length=15): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) # Simple input diff --git a/tests/test_tokenization_openai.py b/tests/test_tokenization_openai.py index 8df7c48c14..1a7568aa5a 100644 --- a/tests/test_tokenization_openai.py +++ b/tests/test_tokenization_openai.py @@ -87,7 +87,7 @@ class OpenAIGPTTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_padding(self, max_length=15): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) # Simple input diff --git a/tests/test_tokenization_phobert.py b/tests/test_tokenization_phobert.py index 3466a34b59..1f7e88deeb 100644 --- a/tests/test_tokenization_phobert.py +++ b/tests/test_tokenization_phobert.py @@ -39,7 +39,7 @@ class PhobertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): with open(self.vocab_file, "w", encoding="utf-8") as fp: for token in vocab_tokens: - fp.write("{} {}".format(token, vocab_tokens[token]) + "\n") + fp.write(f"{token} {vocab_tokens[token]}\n") with open(self.merges_file, "w", encoding="utf-8") as fp: fp.write("\n".join(merges)) diff --git a/tests/test_tokenization_reformer.py b/tests/test_tokenization_reformer.py index 9ceda2c0c6..179cf9bcd1 100644 --- a/tests/test_tokenization_reformer.py +++ b/tests/test_tokenization_reformer.py @@ -65,7 +65,7 @@ class ReformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_padding(self, max_length=15): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) # Simple input diff --git a/tests/test_tokenization_roberta.py b/tests/test_tokenization_roberta.py index af60b60db5..746c88d0f1 100644 --- a/tests/test_tokenization_roberta.py +++ b/tests/test_tokenization_roberta.py @@ -167,7 +167,7 @@ class RobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_embeded_special_tokens(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) tokenizer_p = self.tokenizer_class.from_pretrained(pretrained_name, **kwargs) sentence = "A, AllenNLP sentence." diff --git a/tests/test_tokenization_tapas.py b/tests/test_tokenization_tapas.py index 81de386d85..357fa3773d 100644 --- a/tests/test_tokenization_tapas.py +++ b/tests/test_tokenization_tapas.py @@ -312,7 +312,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_offsets_with_special_characters(self): for tokenizer, pretrained_name, kwargs in self.tokenizers_list: - with self.subTest("{} ({})".format(tokenizer.__class__.__name__, pretrained_name)): + with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"): tokenizer_r = self.rust_tokenizer_class.from_pretrained(pretrained_name, **kwargs) sentence = f"A, naïve {tokenizer_r.mask_token} AllenNLP sentence." @@ -807,18 +807,18 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): empty_tokens = tokenizer(table, padding=True, pad_to_multiple_of=8) normal_tokens = tokenizer(table, "This is a sample input", padding=True, pad_to_multiple_of=8) for key, value in empty_tokens.items(): - self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) + self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8") for key, value in normal_tokens.items(): - self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) + self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8") normal_tokens = tokenizer(table, "This", pad_to_multiple_of=8) for key, value in normal_tokens.items(): - self.assertNotEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) + self.assertNotEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8") # Should also work with truncation normal_tokens = tokenizer(table, "This", padding=True, truncation=True, pad_to_multiple_of=8) for key, value in normal_tokens.items(): - self.assertEqual(len(value) % 8, 0, "BatchEncoding.{} is not multiple of 8".format(key)) + self.assertEqual(len(value) % 8, 0, f"BatchEncoding.{key} is not multiple of 8") @unittest.skip("TAPAS cannot handle `prepare_for_model` without passing by `encode_plus` or `batch_encode_plus`") def test_prepare_for_model(self): diff --git a/tests/test_trainer_distributed.py b/tests/test_trainer_distributed.py index d892745968..4f455c7dae 100644 --- a/tests/test_trainer_distributed.py +++ b/tests/test_trainer_distributed.py @@ -82,11 +82,8 @@ if __name__ == "__main__": training_args = parser.parse_args_into_dataclasses()[0] logger.warning( - "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s", - training_args.local_rank, - training_args.device, - training_args.n_gpu, - training_args.local_rank != -1, + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " + f"distributed training: {training_args.local_rank != -1}" ) # Essentially, what we want to verify in the distributed case is that we get all samples back, diff --git a/tests/test_trainer_tpu.py b/tests/test_trainer_tpu.py index c04a3e8189..20921a6f49 100644 --- a/tests/test_trainer_tpu.py +++ b/tests/test_trainer_tpu.py @@ -69,10 +69,8 @@ def main(): training_args = parser.parse_args_into_dataclasses()[0] logger.warning( - "Process rank: %s, device: %s, tpu_num_cores: %s", - training_args.local_rank, - training_args.device, - training_args.tpu_num_cores, + f"Process rank: {training_args.local_rank}, device: {training_args.device}, " + f"tpu_num_cores: {training_args.tpu_num_cores}", ) # Essentially, what we want to verify in the distributed case is diff --git a/utils/download_glue_data.py b/utils/download_glue_data.py index b46cbcd7b2..ab345c4e72 100644 --- a/utils/download_glue_data.py +++ b/utils/download_glue_data.py @@ -45,8 +45,8 @@ MRPC_TEST = "https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphras def download_and_extract(task, data_dir): - print("Downloading and extracting %s..." % task) - data_file = "%s.zip" % task + print(f"Downloading and extracting {task}...") + data_file = f"{task}.zip" urllib.request.urlretrieve(TASK2PATH[task], data_file) with zipfile.ZipFile(data_file) as zip_ref: zip_ref.extractall(data_dir) diff --git a/utils/link_tester.py b/utils/link_tester.py index 3400817c44..5eb6fed4d5 100644 --- a/utils/link_tester.py +++ b/utils/link_tester.py @@ -91,6 +91,6 @@ if __name__ == "__main__": if broken_links: print("The following links did not respond:") for link in broken_links: - print("- {}".format(link)) + print(f"- {link}") sys.exit(1) print("All links are ok.")