From dd52804f5fce0a568ffbb3dc7fd088d2de0a0e56 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Tue, 17 Nov 2020 15:11:29 -0500 Subject: [PATCH] Remove deprecated (#8604) * Remove old deprecated arguments Co-authored-by: LysandreJik * Remove needless imports * Fix tests Co-authored-by: LysandreJik --- examples/seq2seq/test_finetune_trainer.py | 4 +- examples/token-classification/run_ner_old.py | 4 +- src/transformers/data/processors/squad.py | 6 +- .../models/albert/modeling_albert.py | 22 ------ src/transformers/models/bart/modeling_bart.py | 40 ---------- src/transformers/models/bert/modeling_bert.py | 19 ----- src/transformers/models/ctrl/modeling_ctrl.py | 19 ----- .../models/distilbert/modeling_distilbert.py | 11 --- .../models/electra/modeling_electra.py | 11 --- src/transformers/models/fsmt/modeling_fsmt.py | 24 ------ src/transformers/models/gpt2/modeling_gpt2.py | 33 -------- .../models/gpt2/tokenization_gpt2.py | 8 -- .../models/gpt2/tokenization_gpt2_fast.py | 17 +---- .../models/longformer/modeling_longformer.py | 10 --- .../models/mobilebert/modeling_mobilebert.py | 7 -- .../models/openai/modeling_openai.py | 11 --- .../prophetnet/tokenization_prophetnet.py | 2 +- .../models/roberta/modeling_roberta.py | 9 --- .../models/roberta/tokenization_roberta.py | 8 -- src/transformers/models/t5/modeling_t5.py | 40 ---------- src/transformers/models/t5/modeling_tf_t5.py | 42 ---------- .../transfo_xl/configuration_transfo_xl.py | 10 --- .../transfo_xl/modeling_tf_transfo_xl.py | 8 -- .../models/transfo_xl/modeling_transfo_xl.py | 8 -- .../models/xlm/modeling_tf_xlm.py | 5 +- src/transformers/models/xlm/modeling_xlm.py | 7 +- src/transformers/pipelines.py | 11 +-- src/transformers/tokenization_utils.py | 21 ----- src/transformers/tokenization_utils_base.py | 21 ----- src/transformers/tokenization_utils_fast.py | 18 ----- src/transformers/trainer.py | 76 +------------------ src/transformers/trainer_tf.py | 37 --------- src/transformers/training_args.py | 11 --- ...on_{{cookiecutter.lowercase_modelname}}.py | 31 -------- tests/test_modeling_gpt2.py | 10 ++- tests/test_pipelines_fill_mask.py | 9 --- tests/test_tokenization_auto.py | 2 +- 37 files changed, 22 insertions(+), 610 deletions(-) diff --git a/examples/seq2seq/test_finetune_trainer.py b/examples/seq2seq/test_finetune_trainer.py index 399c1b6c04..70cceae3c5 100644 --- a/examples/seq2seq/test_finetune_trainer.py +++ b/examples/seq2seq/test_finetune_trainer.py @@ -138,7 +138,7 @@ class TestFinetuneTrainer(TestCasePlus): per_device_train_batch_size=batch_size, per_device_eval_batch_size=batch_size, predict_with_generate=True, - evaluate_during_training=True, + evaluation_strategy="steps", do_train=True, do_eval=True, warmup_steps=0, @@ -179,7 +179,7 @@ class TestFinetuneTrainer(TestCasePlus): --per_device_eval_batch_size 4 --learning_rate 3e-3 --warmup_steps 8 - --evaluate_during_training + --evaluation_strategy steps --predict_with_generate --logging_steps 0 --save_steps {str(eval_steps)} diff --git a/examples/token-classification/run_ner_old.py b/examples/token-classification/run_ner_old.py index e97048ecef..7b1c808062 100644 --- a/examples/token-classification/run_ner_old.py +++ b/examples/token-classification/run_ner_old.py @@ -254,7 +254,7 @@ def main(): trainer.save_model() # For convenience, we also re-save the tokenizer to the same directory, # so that you can share your model easily on huggingface.co/models =) - if trainer.is_world_master(): + if trainer.is_world_process_zero(): tokenizer.save_pretrained(training_args.output_dir) # Evaluation @@ -265,7 +265,7 @@ def main(): result = trainer.evaluate() output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt") - if trainer.is_world_master(): + if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key, value in result.items(): diff --git a/src/transformers/data/processors/squad.py b/src/transformers/data/processors/squad.py index 991759a331..45e0a75f35 100644 --- a/src/transformers/data/processors/squad.py +++ b/src/transformers/data/processors/squad.py @@ -145,11 +145,11 @@ def squad_convert_example_to_features( # in the way they compute mask of added tokens. tokenizer_type = type(tokenizer).__name__.replace("Tokenizer", "").lower() sequence_added_tokens = ( - tokenizer.max_len - tokenizer.max_len_single_sentence + 1 + tokenizer.model_max_length - tokenizer.max_len_single_sentence + 1 if tokenizer_type in MULTI_SEP_TOKENS_TOKENIZERS_SET - else tokenizer.max_len - tokenizer.max_len_single_sentence + else tokenizer.model_max_length - tokenizer.max_len_single_sentence ) - sequence_pair_added_tokens = tokenizer.max_len - tokenizer.max_len_sentences_pair + sequence_pair_added_tokens = tokenizer.model_max_length - tokenizer.max_len_sentences_pair span_doc_tokens = all_doc_tokens while len(spans) * doc_stride < len(all_doc_tokens): diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index f7b4702e2e..140c122bad 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -16,7 +16,6 @@ import math import os -import warnings from dataclasses import dataclass from typing import Optional, Tuple @@ -742,7 +741,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`): @@ -753,8 +751,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel): Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair (see :obj:`input_ids` docstring) Indices should be in ``[0, 1]``. ``0`` indicates original order (sequence A, then sequence B), ``1`` indicates switched order (sequence B, then sequence A). - kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): - Used to hide legacy arguments that have been deprecated. Returns: @@ -773,14 +769,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel): >>> sop_logits = outputs.sop_logits """ - - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.albert( @@ -898,23 +886,13 @@ class AlbertForMaskedLM(AlbertPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` - kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): - Used to hide legacy arguments that have been deprecated. """ - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.albert( diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index 9516e5bc8f..df0090c28e 100644 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -15,7 +15,6 @@ """PyTorch BART model, ported from the fairseq repo.""" import math import random -import warnings from typing import Dict, List, Optional, Tuple import numpy as np @@ -529,7 +528,6 @@ class BartDecoder(nn.Module): output_attentions=False, output_hidden_states=False, return_dict=True, - **unused, ): """ Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al., @@ -551,18 +549,6 @@ class BartDecoder(nn.Module): - hidden states - attentions """ - if "decoder_cached_states" in unused: - warnings.warn( - "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = unused.pop("decoder_cached_states") - if "decoder_past_key_values" in unused: - warnings.warn( - "The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = unused.pop("decoder_past_key_values") # check attention mask and invert if encoder_padding_mask is not None: @@ -873,14 +859,7 @@ class BartModel(PretrainedBartModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): - if "decoder_past_key_values" in kwargs: - warnings.warn( - "The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("decoder_past_key_values") if decoder_input_ids is None: use_cache = False @@ -1006,7 +985,6 @@ class BartForConditionalGeneration(PretrainedBartModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **unused, ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): @@ -1034,24 +1012,6 @@ class BartForConditionalGeneration(PretrainedBartModel): >>> tokenizer.decode(predictions).split() >>> # ['good', 'great', 'all', 'really', 'very'] """ - if "lm_labels" in unused: - warnings.warn( - "The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = unused.pop("lm_labels") - if "decoder_cached_states" in unused: - warnings.warn( - "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = unused.pop("decoder_cached_states") - if "decoder_past_key_values" in unused: - warnings.warn( - "The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = unused.pop("decoder_past_key_values") return_dict = return_dict if return_dict is not None else self.config.use_return_dict if labels is not None: diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 8a7aa1a4df..a6bdf64155 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -896,7 +896,6 @@ class BertForPreTraining(BertPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" labels (:obj:`torch.LongTensor` of shape ``(batch_size, sequence_length)``, `optional`): @@ -928,13 +927,6 @@ class BertForPreTraining(BertPreTrainedModel): >>> prediction_logits = outputs.prediction_logits >>> seq_relationship_logits = outputs.seq_relationship_logits """ - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.bert( @@ -1136,24 +1128,13 @@ class BertForMaskedLM(BertPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` - kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): - Used to hide legacy arguments that have been deprecated. """ - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") - assert "lm_labels" not in kwargs, "Use `BertWithLMHead` for autoregressive language modeling task." - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/src/transformers/models/ctrl/modeling_ctrl.py b/src/transformers/models/ctrl/modeling_ctrl.py index 4b9ae6debe..225560297e 100644 --- a/src/transformers/models/ctrl/modeling_ctrl.py +++ b/src/transformers/models/ctrl/modeling_ctrl.py @@ -15,9 +15,6 @@ # limitations under the License. """ PyTorch CTRL model.""" - -import warnings - import numpy as np import torch import torch.nn as nn @@ -369,15 +366,7 @@ class CTRLModel(CTRLPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): - if "past" in kwargs: - warnings.warn( - "The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("past") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions use_cache = use_cache if use_cache is not None else self.config.use_cache @@ -542,7 +531,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): @@ -550,13 +538,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): ``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` """ - if "past" in kwargs: - warnings.warn( - "The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("past") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict transformer_outputs = self.transformer( diff --git a/src/transformers/models/distilbert/modeling_distilbert.py b/src/transformers/models/distilbert/modeling_distilbert.py index 2b2fdb5252..df89a3bc1a 100755 --- a/src/transformers/models/distilbert/modeling_distilbert.py +++ b/src/transformers/models/distilbert/modeling_distilbert.py @@ -20,7 +20,6 @@ import copy import math -import warnings import numpy as np import torch @@ -526,23 +525,13 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``. - kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): - Used to hide legacy arguments that have been deprecated. """ - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict dlbrt_output = self.distilbert( diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py index 00dfa76dc1..3a4903cd26 100644 --- a/src/transformers/models/electra/modeling_electra.py +++ b/src/transformers/models/electra/modeling_electra.py @@ -16,7 +16,6 @@ import math import os -import warnings from dataclasses import dataclass from typing import Optional, Tuple @@ -1000,23 +999,13 @@ class ElectraForMaskedLM(ElectraPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]`` - kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): - Used to hide legacy arguments that have been deprecated. """ - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict generator_hidden_states = self.electra( diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index b4b42ebff2..56de8a716d 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -29,7 +29,6 @@ import math import random -import warnings from typing import Any, Dict, List, Optional, Tuple import torch @@ -618,7 +617,6 @@ class FSMTDecoder(nn.Module): output_attentions=False, output_hidden_states=False, return_dict=True, - **unused, ): """ Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al., @@ -640,19 +638,6 @@ class FSMTDecoder(nn.Module): - hidden states - attentions """ - if "decoder_cached_states" in unused: - warnings.warn( - "The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = unused.pop("decoder_cached_states") - if "decoder_past_key_values" in unused: - warnings.warn( - "The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = unused.pop("decoder_past_key_values") - # check attention mask and invert if encoder_padding_mask is not None: encoder_padding_mask = invert_mask(encoder_padding_mask) @@ -933,15 +918,7 @@ class FSMTModel(PretrainedFSMTModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): - if "decoder_past_key_values" in kwargs: - warnings.warn( - "The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("decoder_past_key_values") - if decoder_input_ids is None: use_cache = False @@ -1071,7 +1048,6 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **unused, ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index e4bb41ae6a..759c275b74 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -16,7 +16,6 @@ """PyTorch OpenAI GPT-2 model.""" import os -import warnings from dataclasses import dataclass from typing import List, Optional, Tuple @@ -528,16 +527,7 @@ class GPT2Model(GPT2PreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): - if "past" in kwargs: - warnings.warn( - "The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("past") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states @@ -758,7 +748,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): @@ -766,13 +755,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): ``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` """ - if "past" in kwargs: - warnings.warn( - "The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("past") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict transformer_outputs = self.transformer( @@ -900,8 +882,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): Labels for computing the multiple choice classification loss. Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see `input_ids` above) - kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): - Used to hide legacy arguments that have been deprecated. Return: @@ -930,19 +910,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): >>> mc_logits = outputs.mc_logits """ - if "lm_labels" in kwargs: - warnings.warn( - "The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("lm_labels") - if "past" in kwargs: - warnings.warn( - "The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("past") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict transformer_outputs = self.transformer( diff --git a/src/transformers/models/gpt2/tokenization_gpt2.py b/src/transformers/models/gpt2/tokenization_gpt2.py index 6e9711c858..937e7a098a 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2.py +++ b/src/transformers/models/gpt2/tokenization_gpt2.py @@ -17,7 +17,6 @@ import json import os -import warnings from functools import lru_cache from typing import Optional, Tuple @@ -293,13 +292,6 @@ class GPT2Tokenizer(PreTrainedTokenizer): return vocab_file, merge_file def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs): - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space) if is_split_into_words or add_prefix_space: text = " " + text diff --git a/src/transformers/models/gpt2/tokenization_gpt2_fast.py b/src/transformers/models/gpt2/tokenization_gpt2_fast.py index bedfed2c9c..d8d957e175 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2_fast.py +++ b/src/transformers/models/gpt2/tokenization_gpt2_fast.py @@ -16,7 +16,6 @@ import json -import warnings from typing import Optional, Tuple from tokenizers import pre_tokenizers @@ -151,13 +150,6 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast): self.add_prefix_space = add_prefix_space def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding: - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - is_split_into_words = kwargs.get("is_split_into_words", False) assert self.add_prefix_space or not is_split_into_words, ( f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True " @@ -167,14 +159,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast): return super()._batch_encode_plus(*args, **kwargs) def _encode_plus(self, *args, **kwargs) -> BatchEncoding: - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - else: - is_split_into_words = kwargs.get("is_split_into_words", False) + is_split_into_words = kwargs.get("is_split_into_words", False) assert self.add_prefix_space or not is_split_into_words, ( f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True " diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index f9972a771e..27219f92f9 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -15,7 +15,6 @@ """PyTorch Longformer model. """ import math -import warnings from dataclasses import dataclass from typing import Optional, Tuple @@ -1509,7 +1508,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): @@ -1538,14 +1536,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel): >>> loss = outputs.loss >>> prediction_logits = output.logits """ - - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.longformer( diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index 131b085d55..73c6a2fbb4 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -1109,7 +1109,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): @@ -1119,12 +1118,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel): kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): Used to hide legacy arguments that have been deprecated. """ - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.mobilebert( diff --git a/src/transformers/models/openai/modeling_openai.py b/src/transformers/models/openai/modeling_openai.py index 3e30bdd773..18f0a1f687 100644 --- a/src/transformers/models/openai/modeling_openai.py +++ b/src/transformers/models/openai/modeling_openai.py @@ -19,7 +19,6 @@ import json import math import os -import warnings from dataclasses import dataclass from typing import Optional, Tuple @@ -645,7 +644,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input): @@ -659,8 +657,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): Labels for computing the multiple choice classification loss. Indices should be in ``[0, ..., num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see `input_ids` above) - kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): - Used to hide legacy arguments that have been deprecated. Return: @@ -683,13 +679,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): >>> mc_logits = outputs.mc_logits """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict - if "lm_labels" in kwargs: - warnings.warn( - "The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("lm_labels") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." transformer_outputs = self.transformer( input_ids, diff --git a/src/transformers/models/prophetnet/tokenization_prophetnet.py b/src/transformers/models/prophetnet/tokenization_prophetnet.py index 5f0c125e3d..5d93a00e85 100644 --- a/src/transformers/models/prophetnet/tokenization_prophetnet.py +++ b/src/transformers/models/prophetnet/tokenization_prophetnet.py @@ -302,7 +302,7 @@ class ProphetNetTokenizer(PreTrainedTokenizer): **kwargs, ) -> BatchEncoding: if max_length is None: - max_length = self.max_len + max_length = self.model_max_length model_inputs = self( src_texts, add_special_tokens=True, diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index ae9d6dd5b6..2b1c83dc99 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -16,7 +16,6 @@ """PyTorch RoBERTa model. """ import math -import warnings import torch import torch.nn as nn @@ -872,7 +871,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): @@ -882,13 +880,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel): kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): Used to hide legacy arguments that have been deprecated. """ - if "masked_lm_labels" in kwargs: - warnings.warn( - "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("masked_lm_labels") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.roberta( diff --git a/src/transformers/models/roberta/tokenization_roberta.py b/src/transformers/models/roberta/tokenization_roberta.py index 91475defbe..0c6b985ad1 100644 --- a/src/transformers/models/roberta/tokenization_roberta.py +++ b/src/transformers/models/roberta/tokenization_roberta.py @@ -14,7 +14,6 @@ # limitations under the License. """Tokenization classes for RoBERTa.""" -import warnings from typing import List, Optional from ...tokenization_utils import AddedToken @@ -251,13 +250,6 @@ class RobertaTokenizer(GPT2Tokenizer): return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0] def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs): - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space) if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()): text = " " + text diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index bd0ac1c75d..915c9548c1 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -18,7 +18,6 @@ import copy import math import os -import warnings import torch import torch.nn.functional as F @@ -1048,7 +1047,6 @@ class T5Model(T5PreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" Returns: @@ -1066,20 +1064,6 @@ class T5Model(T5PreTrainedModel): >>> last_hidden_states = outputs.last_hidden_state """ - if "decoder_past_key_value_states" in kwargs: - warnings.warn( - "The `decoder_past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("decoder_past_key_value_states") - if "decoder_past_key_values" in kwargs: - warnings.warn( - "The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("decoder_past_key_values") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." - use_cache = use_cache if use_cache is not None else self.config.use_cache return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1198,15 +1182,12 @@ class T5ForConditionalGeneration(T5PreTrainedModel): output_attentions=None, output_hidden_states=None, return_dict=None, - **kwargs, ): r""" labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[-100, 0, ..., config.vocab_size - 1]`. All labels set to ``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]`` - kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`): - Used to hide legacy arguments that have been deprecated. Returns: @@ -1226,27 +1207,6 @@ class T5ForConditionalGeneration(T5PreTrainedModel): >>> input_ids = tokenizer("summarize: studies have shown that owning a dog is good for you ", return_tensors="pt").input_ids # Batch size 1 >>> outputs = model.generate(input_ids) """ - - if "lm_labels" in kwargs: - warnings.warn( - "The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.", - FutureWarning, - ) - labels = kwargs.pop("lm_labels") - if "decoder_past_key_value_states" in kwargs: - warnings.warn( - "The `decoder_past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("decoder_past_key_value_states") - if "decoder_past_key_values" in kwargs: - warnings.warn( - "The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("decoder_past_key_values") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." - use_cache = use_cache if use_cache is not None else self.config.use_cache return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 1f7a78f5bf..4d721a531d 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -595,7 +595,6 @@ class TFT5MainLayer(tf.keras.layers.Layer): output_attentions=None, output_hidden_states=None, training=False, - **kwargs, ) -> Tuple: if isinstance(inputs, (tuple, list)): input_ids = inputs[0] @@ -621,21 +620,8 @@ class TFT5MainLayer(tf.keras.layers.Layer): output_attentions = inputs.get("output_attentions", output_attentions) output_hidden_states = inputs.get("output_hidden_states", output_hidden_states) assert len(inputs) <= 10, "Too many inputs." - - if "past_key_values" in inputs: - warnings.warn( - "The `past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = inputs.pop("past_key_values") else: input_ids = inputs - if "past_key_values" in kwargs: - warnings.warn( - "The `past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("past_key_values") output_attentions = output_attentions if output_attentions is not None else self.output_attentions output_hidden_states = output_hidden_states if output_hidden_states is not None else self.output_hidden_states @@ -1078,23 +1064,9 @@ class TFT5Model(TFT5PreTrainedModel): output_attentions = inputs.get("output_attentions", output_attentions) output_hidden_states = inputs.get("output_hidden_states", output_hidden_states) assert len(inputs) <= 13, "Too many inputs." - - if "past_key_value_states" in inputs: - warnings.warn( - "The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = inputs.pop("past_key_value_states") else: input_ids = inputs - if "past_key_value_states" in kwargs: - warnings.warn( - "The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("past_key_value_states") - use_cache = use_cache if use_cache is not None else self.config.use_cache output_attentions = output_attentions if output_attentions else self.config.output_attentions output_hidden_states = output_hidden_states if output_hidden_states else self.config.output_hidden_states @@ -1294,23 +1266,9 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling output_hidden_states = inputs.get("output_hidden_states", output_hidden_states) return_dict = inputs.get("return_dict", return_dict) assert len(inputs) <= 14, "Too many inputs." - - if "past_key_value_states" in inputs: - warnings.warn( - "The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = inputs.pop("past_key_value_states") else: input_ids = inputs - if "past_key_value_states" in kwargs: - warnings.warn( - "The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.", - FutureWarning, - ) - past_key_values = kwargs.pop("past_key_value_states") - use_cache = use_cache if use_cache is not None else self.config.use_cache output_attentions = output_attentions if output_attentions else self.config.output_attentions output_hidden_states = output_hidden_states if output_hidden_states else self.config.output_hidden_states diff --git a/src/transformers/models/transfo_xl/configuration_transfo_xl.py b/src/transformers/models/transfo_xl/configuration_transfo_xl.py index 3585a97b93..9885cbfa2e 100644 --- a/src/transformers/models/transfo_xl/configuration_transfo_xl.py +++ b/src/transformers/models/transfo_xl/configuration_transfo_xl.py @@ -15,9 +15,6 @@ # limitations under the License. """ Transformer XL configuration """ - -import warnings - from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -139,13 +136,6 @@ class TransfoXLConfig(PretrainedConfig): eos_token_id=0, **kwargs ): - if "tie_weight" in kwargs: - warnings.warn( - "The config parameter `tie_weight` is deprecated. Please use `tie_word_embeddings` instead.", - FutureWarning, - ) - kwargs["tie_word_embeddings"] = kwargs["tie_weight"] - super().__init__(eos_token_id=eos_token_id, **kwargs) self.vocab_size = vocab_size self.cutoffs = [] diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index dda6204356..c0d963ed1e 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -16,7 +16,6 @@ """ TF 2.0 Transformer XL model. """ -import warnings from dataclasses import dataclass from typing import List, Optional, Tuple @@ -865,13 +864,6 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): return self.crit.out_layers[-1] return None - def reset_length(self, tgt_len, ext_len, mem_len): - warnings.warn( - "The method `reset_length` is deprecated and will be removed in a future version, use `reset_memory_length` instead.", - FutureWarning, - ) - self.transformer.reset_memory_length(mem_len) - def reset_memory_length(self, mem_len): self.transformer.reset_memory_length(mem_len) diff --git a/src/transformers/models/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_transfo_xl.py index 8843febfe7..f231e5e0c7 100644 --- a/src/transformers/models/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_transfo_xl.py @@ -17,7 +17,6 @@ PyTorch Transformer XL model. Adapted from https://github.com/kimiyoung/transformer-xl. In particular https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py """ -import warnings from dataclasses import dataclass from typing import List, Optional, Tuple @@ -1010,13 +1009,6 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): else: self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[i] - def reset_length(self, tgt_len, ext_len, mem_len): - warnings.warn( - "The method `reset_length` is deprecated and will be removed in a future version, use `reset_memory_length` instead.", - FutureWarning, - ) - self.transformer.reset_memory_length(mem_len) - def reset_memory_length(self, mem_len): self.transformer.reset_memory_length(mem_len) diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index 6f4cf368e4..2ad636b2ce 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -16,9 +16,7 @@ TF 2.0 XLM model. """ - import itertools -import warnings from dataclasses import dataclass from typing import Optional, Tuple @@ -997,10 +995,9 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss): ) if lengths is not None: - warnings.warn( + logger.warn( "The `lengths` parameter cannot be used with the XLM multiple choice models. Please use the " "attention mask instead.", - FutureWarning, ) lengths = None diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index 94e303db14..a144d58c73 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -16,10 +16,8 @@ PyTorch XLM model. """ - import itertools import math -import warnings from dataclasses import dataclass from typing import Optional, Tuple @@ -1228,10 +1226,9 @@ class XLMForMultipleChoice(XLMPreTrainedModel): ) if lengths is not None: - warnings.warn( + logger.warn( "The `lengths` parameter cannot be used with the XLM multiple choice models. Please use the " - "attention mask instead.", - FutureWarning, + "attention mask instead." ) lengths = None diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py index c6b60f902c..27ad1d4625 100755 --- a/src/transformers/pipelines.py +++ b/src/transformers/pipelines.py @@ -1182,7 +1182,6 @@ class FillMaskPipeline(Pipeline): device: int = -1, top_k=5, task: str = "", - **kwargs ): super().__init__( model=model, @@ -1196,15 +1195,7 @@ class FillMaskPipeline(Pipeline): ) self.check_model_type(TF_MODEL_WITH_LM_HEAD_MAPPING if self.framework == "tf" else MODEL_FOR_MASKED_LM_MAPPING) - - if "topk" in kwargs: - warnings.warn( - "The `topk` argument is deprecated and will be removed in a future version, use `top_k` instead.", - FutureWarning, - ) - self.top_k = kwargs.pop("topk") - else: - self.top_k = top_k + self.top_k = top_k def ensure_exactly_one_mask_token(self, masked_index: np.ndarray): numel = np.prod(masked_index.shape) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index c9f63eba3b..d6212ae0b6 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -19,7 +19,6 @@ import itertools import re import unicodedata -import warnings from typing import Any, Dict, List, Optional, Tuple, Union, overload from .file_utils import add_end_docstrings @@ -246,12 +245,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): Returns: :obj:`List[str]`: The list of tokens. """ - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - kwargs["is_split_into_words"] = kwargs.pop("is_pretokenized") # Simple mapping string => AddedToken for special tokens with specific tokenization behaviors all_special_tokens_extended = dict( (str(t), t) for t in self.all_special_tokens_extended if isinstance(t, AddedToken) @@ -448,13 +441,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): "https://github.com/huggingface/transformers/pull/2674" ) - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - first_ids = get_input_ids(text) second_ids = get_input_ids(text_pair) if text_pair is not None else None @@ -530,13 +516,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): "transformers.PreTrainedTokenizerFast." ) - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - input_ids = [] for ids_or_pair_ids in batch_text_or_text_pairs: if not isinstance(ids_or_pair_ids, (list, tuple)): diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 25bd051f82..0fbad8b74a 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1532,18 +1532,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): super().__init__(**kwargs) - @property - def max_len(self) -> int: - """ - :obj:`int`: **Deprecated** Kept here for backward compatibility. Now renamed to :obj:`model_max_length` to - avoid ambiguity. - """ - warnings.warn( - "The `max_len` attribute has been deprecated and will be removed in a future version, use `model_max_length` instead.", - FutureWarning, - ) - return self.model_max_length - @property def max_len_single_sentence(self) -> int: """ @@ -2785,15 +2773,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): and ``convert_tokens_to_ids`` methods. """ - if "return_lengths" in kwargs: - if verbose: - warnings.warn( - "The PreTrainedTokenizerBase.prepare_for_model `return_lengths` parameter is deprecated. " - "Please use `return_length` instead.", - FutureWarning, - ) - return_length = kwargs["return_lengths"] - # Backward compatibility for 'truncation_strategy', 'pad_to_max_length' padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies( padding=padding, diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index c672a0b02e..3a5029c889 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -19,7 +19,6 @@ import json import os -import warnings from collections import defaultdict from typing import Any, Dict, List, Optional, Tuple, Union @@ -357,7 +356,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): return_offsets_mapping: bool = False, return_length: bool = False, verbose: bool = True, - **kwargs ) -> BatchEncoding: if not isinstance(batch_text_or_text_pairs, list): @@ -365,16 +363,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): "batch_text_or_text_pairs has to be a list (got {})".format(type(batch_text_or_text_pairs)) ) - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - - if kwargs: - raise ValueError(f"Keyword arguments {kwargs} not recognized.") - # Set the truncation and padding strategy and restore the initial configuration self.set_truncation_and_padding( padding_strategy=padding_strategy, @@ -453,12 +441,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): verbose: bool = True, **kwargs ) -> BatchEncoding: - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") batched_input = [(text, text_pair)] if text_pair else [text] batched_output = self._batch_encode_plus( diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 9e31d085ba..72f8f7d985 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -213,8 +213,6 @@ class Trainer: containing the optimizer and the scheduler to use. Will default to an instance of :class:`~transformers.AdamW` on your model and a scheduler given by :func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`. - kwargs: - Deprecated keyword arguments. """ def __init__( @@ -229,7 +227,6 @@ class Trainer: compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, callbacks: Optional[List[TrainerCallback]] = None, optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), - **kwargs, ): if args is None: logger.info("No `TrainingArguments` passed, using the current path as `output_dir`.") @@ -262,27 +259,6 @@ class Trainer: self.callback_handler = CallbackHandler(callbacks, self.model, self.optimizer, self.lr_scheduler) self.add_callback(PrinterCallback if self.args.disable_tqdm else DEFAULT_PROGRESS_CALLBACK) - # Deprecated arguments - if "tb_writer" in kwargs: - warnings.warn( - "Passing `tb_writer` as a keyword argument is deprecated and won't be possible in a " - + "future version. Use `TensorBoardCallback(tb_writer=...)` instead and pass it to the `callbacks`" - + "argument", - FutureWarning, - ) - tb_writer = kwargs.pop("tb_writer") - self.remove_callback(TensorBoardCallback) - self.add_callback(TensorBoardCallback(tb_writer=tb_writer)) - if "prediction_loss_only" in kwargs: - warnings.warn( - "Passing `prediction_loss_only` as a keyword argument is deprecated and won't be possible in a " - + "future version. Use `args.prediction_loss_only` instead. Setting " - + f"`args.prediction_loss_only={kwargs['prediction_loss_only']}", - FutureWarning, - ) - self.args.prediction_loss_only = kwargs.pop("prediction_loss_only") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." - # Will be set to True by `self._setup_loggers()` on first call to `self.log()`. self._loggers_initialized = False @@ -294,14 +270,7 @@ class Trainer: # We'll find a more elegant and not need to do this in the future. self.model.config.xla_device = True if not callable(self.data_collator) and callable(getattr(self.data_collator, "collate_batch", None)): - self.data_collator = self.data_collator.collate_batch - warnings.warn( - ( - "The `data_collator` should now be a simple callable (function, class with `__call__`), classes " - + "with a `collate_batch` are deprecated and won't be supported in a future version." - ), - FutureWarning, - ) + raise ValueError("The `data_collator` should be a simple callable (function, class with `__call__`).") if args.max_steps > 0: logger.info("max_steps is given, it will override any value given in num_train_epochs") @@ -1050,12 +1019,6 @@ class Trainer: logs (:obj:`Dict[str, float]`): The values to log. """ - if hasattr(self, "_log"): - warnings.warn( - "The `_log` method is deprecated and won't be called in a future version, define `log` in your subclass.", - FutureWarning, - ) - return self._log(logs) if self.state.epoch is not None: logs["epoch"] = self.state.epoch @@ -1095,12 +1058,6 @@ class Trainer: Return: :obj:`torch.Tensor`: The tensor with training loss on this batch. """ - if hasattr(self, "_training_step"): - warnings.warn( - "The `_training_step` method is deprecated and won't be called in a future version, define `training_step` in your subclass.", - FutureWarning, - ) - return self._training_step(model, inputs, self.optimizer) model.train() inputs = self._prepare_inputs(inputs) @@ -1140,18 +1097,6 @@ class Trainer: # We don't use .loss here since the model may return tuples instead of ModelOutput. return outputs[0] - def is_local_master(self) -> bool: - """ - Whether or not this process is the local (e.g., on one machine if training in a distributed fashion on several - machines) main process. - - .. warning:: - - This method is deprecated, use :meth:`~transformers.Trainer.is_local_process_zero` instead. - """ - warnings.warn("This method is deprecated, use `Trainer.is_local_process_zero()` instead.", FutureWarning) - return self.is_local_process_zero() - def is_local_process_zero(self) -> bool: """ Whether or not this process is the local (e.g., on one machine if training in a distributed fashion on several @@ -1162,18 +1107,6 @@ class Trainer: else: return self.args.local_rank in [-1, 0] - def is_world_master(self) -> bool: - """ - Whether or not this process is the global main process (when training in a distributed fashion on several - machines, this is only going to be :obj:`True` for one process). - - .. warning:: - - This method is deprecated, use :meth:`~transformers.Trainer.is_world_process_zero` instead. - """ - warnings.warn("This method is deprecated, use `Trainer.is_world_process_zero()` instead.", FutureWarning) - return self.is_world_process_zero() - def is_world_process_zero(self) -> bool: """ Whether or not this process is the global main process (when training in a distributed fashion on several @@ -1362,13 +1295,6 @@ class Trainer: Works both with or without labels. """ - if hasattr(self, "_prediction_loop"): - warnings.warn( - "The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.", - FutureWarning, - ) - return self._prediction_loop(dataloader, description, prediction_loss_only=prediction_loss_only) - if not isinstance(dataloader.dataset, collections.abc.Sized): raise ValueError("dataset must implement __len__") prediction_loss_only = ( diff --git a/src/transformers/trainer_tf.py b/src/transformers/trainer_tf.py index 64420e4f4a..6275ceafe5 100644 --- a/src/transformers/trainer_tf.py +++ b/src/transformers/trainer_tf.py @@ -3,7 +3,6 @@ import datetime import math import os -import warnings from typing import Callable, Dict, Optional, Tuple @@ -66,8 +65,6 @@ class TFTrainer: :class:`~transformers.AdamWeightDecay`. The scheduler will default to an instance of :class:`tf.keras.optimizers.schedules.PolynomialDecay` if :obj:`args.num_warmup_steps` is 0 else an instance of :class:`~transformers.WarmUp`. - kwargs: - Deprecated keyword arguments. """ def __init__( @@ -82,7 +79,6 @@ class TFTrainer: None, None, ), - **kwargs, ): assert parse(tf.__version__).release >= (2, 2, 0), ( "You need to run the TensorFlow trainer with at least the version 2.2.0, your version is %r " @@ -98,13 +94,6 @@ class TFTrainer: self.gradient_accumulator = GradientAccumulator() self.global_step = 0 self.epoch_logging = 0 - if "prediction_loss_only" in kwargs: - warnings.warn( - "Passing `prediction_loss_only` as a keyword argument is deprecated and won't be possible in a future version. Use `args.prediction_loss_only` instead.", - FutureWarning, - ) - self.args.prediction_loss_only = kwargs.pop("prediction_loss_only") - assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}." if tb_writer is not None: self.tb_writer = tb_writer @@ -249,12 +238,6 @@ class TFTrainer: WANDB_DISABLED: (Optional): boolean - defaults to false, set to "true" to disable wandb entirely. """ - if hasattr(self, "_setup_wandb"): - warnings.warn( - "The `_setup_wandb` method is deprecated and won't be called in a future version, define `setup_wandb` in your subclass.", - FutureWarning, - ) - return self._setup_wandb() logger.info('Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"') combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()} @@ -304,14 +287,6 @@ class TFTrainer: Works both with or without labels. """ - if hasattr(self, "_prediction_loop"): - warnings.warn( - "The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.", - FutureWarning, - ) - return self._prediction_loop( - dataset, steps, num_examples, description, prediction_loss_only=prediction_loss_only - ) prediction_loss_only = ( prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only @@ -393,12 +368,6 @@ class TFTrainer: logs (:obj:`Dict[str, float]`): The values to log. """ - if hasattr(self, "_log"): - warnings.warn( - "The `_log` method is deprecated and won't be called in a future version, define `log` in your subclass.", - FutureWarning, - ) - return self._log(logs) logs["epoch"] = self.epoch_logging if self.tb_writer: @@ -733,12 +702,6 @@ class TFTrainer: Returns: A tuple of two :obj:`tf.Tensor`: The loss and logits. """ - if hasattr(self, "_run_model"): - warnings.warn( - "The `_run_model` method is deprecated and won't be called in a future version, define `run_model` in your subclass.", - FutureWarning, - ) - return self._run_model(features, labels, training) if self.args.past_index >= 0 and getattr(self, "_past", None) is not None: features["mems"] = self._past diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 4761e649ee..d9650261ea 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -1,7 +1,6 @@ import dataclasses import json import os -import warnings from dataclasses import dataclass, field from enum import Enum from typing import Any, Dict, List, Optional, Tuple @@ -198,10 +197,6 @@ class TrainingArguments: do_train: bool = field(default=False, metadata={"help": "Whether to run training."}) do_eval: bool = field(default=None, metadata={"help": "Whether to run eval on the dev set."}) do_predict: bool = field(default=False, metadata={"help": "Whether to run predictions on the test set."}) - evaluate_during_training: bool = field( - default=False, - metadata={"help": "Run evaluation during training at each logging step."}, - ) evaluation_strategy: EvaluationStrategy = field( default="no", metadata={"help": "Run evaluation during training at each logging step."}, @@ -340,12 +335,6 @@ class TrainingArguments: def __post_init__(self): if self.disable_tqdm is None: self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN - if self.evaluate_during_training is True: - self.evaluation_strategy = EvaluationStrategy.STEPS - warnings.warn( - "The `evaluate_during_training` argument is deprecated in favor of `evaluation_strategy` (which has more options)", - FutureWarning, - ) self.evaluation_strategy = EvaluationStrategy(self.evaluation_strategy) if self.do_eval is False and self.evaluation_strategy != EvaluationStrategy.NO: self.do_eval = True diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/tokenization_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/tokenization_{{cookiecutter.lowercase_modelname}}.py index 8dcbb1e9b3..14e64cb854 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/tokenization_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/tokenization_{{cookiecutter.lowercase_modelname}}.py @@ -73,7 +73,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast): max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION {%- elif cookiecutter.tokenizer_type == "Standalone" %} -import warnings from typing import List, Optional from tokenizers import ByteLevelBPETokenizer @@ -234,13 +233,6 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(PreTrainedTokenizer): return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0] def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs): - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space) if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()): text = " " + text @@ -285,29 +277,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast) ) self.add_prefix_space = add_prefix_space - def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding: - is_split_into_words = None - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.pop("is_pretokenized") - - is_split_into_words = kwargs.get("is_split_into_words", False) if is_split_into_words is None else is_split_into_words - return super()._batch_encode_plus(*args, **kwargs) - - def _encode_plus(self, *args, **kwargs) -> BatchEncoding: - is_split_into_words = None - if "is_pretokenized" in kwargs: - warnings.warn( - "`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.", - FutureWarning, - ) - is_split_into_words = kwargs.get("is_split_into_words", False) if is_split_into_words is None else is_split_into_words - return super()._encode_plus(*args, **kwargs) - - def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id] if token_ids_1 is None: diff --git a/tests/test_modeling_gpt2.py b/tests/test_modeling_gpt2.py index 3ed643f205..900a989a10 100644 --- a/tests/test_modeling_gpt2.py +++ b/tests/test_modeling_gpt2.py @@ -213,7 +213,9 @@ class GPT2ModelTester: next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1) output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"] - output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"] + output_from_past = model(next_tokens, token_type_ids=next_token_types, past_key_values=past)[ + "last_hidden_state" + ] # select random slice random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() @@ -255,7 +257,7 @@ class GPT2ModelTester: # get two different outputs output_from_no_past = model(next_input_ids, attention_mask=attn_mask)["last_hidden_state"] - output_from_past = model(next_tokens, past=past, attention_mask=attn_mask)["last_hidden_state"] + output_from_past = model(next_tokens, past_key_values=past, attention_mask=attn_mask)["last_hidden_state"] # select random slice random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() @@ -286,7 +288,9 @@ class GPT2ModelTester: next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1) output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"] - output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"] + output_from_past = model(next_tokens, token_type_ids=next_token_types, past_key_values=past)[ + "last_hidden_state" + ] self.parent.assertTrue(output_from_past.shape[1] == next_tokens.shape[1]) # select random slice diff --git a/tests/test_pipelines_fill_mask.py b/tests/test_pipelines_fill_mask.py index 16404e8fd7..b1cc83ffae 100644 --- a/tests/test_pipelines_fill_mask.py +++ b/tests/test_pipelines_fill_mask.py @@ -1,7 +1,5 @@ import unittest -import pytest - from transformers import pipeline from transformers.testing_utils import require_tf, require_torch, slow @@ -53,13 +51,6 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): ] expected_check_keys = ["sequence"] - @require_torch - def test_torch_topk_deprecation(self): - # At pipeline initialization only it was not enabled at pipeline - # call site before - with pytest.warns(FutureWarning, match=r".*use `top_k`.*"): - pipeline(task="fill-mask", model=self.small_models[0], topk=1) - @require_torch def test_torch_fill_mask(self): valid_inputs = "My name is " diff --git a/tests/test_tokenization_auto.py b/tests/test_tokenization_auto.py index b090570e84..45f5635ab1 100644 --- a/tests/test_tokenization_auto.py +++ b/tests/test_tokenization_auto.py @@ -83,7 +83,7 @@ class AutoTokenizerTest(unittest.TestCase): else: self.assertEqual(tokenizer.do_lower_case, False) - self.assertEqual(tokenizer.max_len, 512) + self.assertEqual(tokenizer.model_max_length, 512) @require_tokenizers def test_tokenizer_identifier_non_existent(self):