Remove deprecated (#8604)
* Remove old deprecated arguments Co-authored-by: LysandreJik <lysandre.debut@reseau.eseo.fr> * Remove needless imports * Fix tests Co-authored-by: LysandreJik <lysandre.debut@reseau.eseo.fr>
This commit is contained in:
@@ -138,7 +138,7 @@ class TestFinetuneTrainer(TestCasePlus):
|
||||
per_device_train_batch_size=batch_size,
|
||||
per_device_eval_batch_size=batch_size,
|
||||
predict_with_generate=True,
|
||||
evaluate_during_training=True,
|
||||
evaluation_strategy="steps",
|
||||
do_train=True,
|
||||
do_eval=True,
|
||||
warmup_steps=0,
|
||||
@@ -179,7 +179,7 @@ class TestFinetuneTrainer(TestCasePlus):
|
||||
--per_device_eval_batch_size 4
|
||||
--learning_rate 3e-3
|
||||
--warmup_steps 8
|
||||
--evaluate_during_training
|
||||
--evaluation_strategy steps
|
||||
--predict_with_generate
|
||||
--logging_steps 0
|
||||
--save_steps {str(eval_steps)}
|
||||
|
||||
@@ -254,7 +254,7 @@ def main():
|
||||
trainer.save_model()
|
||||
# For convenience, we also re-save the tokenizer to the same directory,
|
||||
# so that you can share your model easily on huggingface.co/models =)
|
||||
if trainer.is_world_master():
|
||||
if trainer.is_world_process_zero():
|
||||
tokenizer.save_pretrained(training_args.output_dir)
|
||||
|
||||
# Evaluation
|
||||
@@ -265,7 +265,7 @@ def main():
|
||||
result = trainer.evaluate()
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
||||
if trainer.is_world_master():
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in result.items():
|
||||
|
||||
@@ -145,11 +145,11 @@ def squad_convert_example_to_features(
|
||||
# in the way they compute mask of added tokens.
|
||||
tokenizer_type = type(tokenizer).__name__.replace("Tokenizer", "").lower()
|
||||
sequence_added_tokens = (
|
||||
tokenizer.max_len - tokenizer.max_len_single_sentence + 1
|
||||
tokenizer.model_max_length - tokenizer.max_len_single_sentence + 1
|
||||
if tokenizer_type in MULTI_SEP_TOKENS_TOKENIZERS_SET
|
||||
else tokenizer.max_len - tokenizer.max_len_single_sentence
|
||||
else tokenizer.model_max_length - tokenizer.max_len_single_sentence
|
||||
)
|
||||
sequence_pair_added_tokens = tokenizer.max_len - tokenizer.max_len_sentences_pair
|
||||
sequence_pair_added_tokens = tokenizer.model_max_length - tokenizer.max_len_sentences_pair
|
||||
|
||||
span_doc_tokens = all_doc_tokens
|
||||
while len(spans) * doc_stride < len(all_doc_tokens):
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
import math
|
||||
import os
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -742,7 +741,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`):
|
||||
@@ -753,8 +751,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
||||
Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
|
||||
(see :obj:`input_ids` docstring) Indices should be in ``[0, 1]``. ``0`` indicates original order (sequence
|
||||
A, then sequence B), ``1`` indicates switched order (sequence B, then sequence A).
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -773,14 +769,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
||||
>>> sop_logits = outputs.sop_logits
|
||||
|
||||
"""
|
||||
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
outputs = self.albert(
|
||||
@@ -898,23 +886,13 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
outputs = self.albert(
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
"""PyTorch BART model, ported from the fairseq repo."""
|
||||
import math
|
||||
import random
|
||||
import warnings
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
@@ -529,7 +528,6 @@ class BartDecoder(nn.Module):
|
||||
output_attentions=False,
|
||||
output_hidden_states=False,
|
||||
return_dict=True,
|
||||
**unused,
|
||||
):
|
||||
"""
|
||||
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
|
||||
@@ -551,18 +549,6 @@ class BartDecoder(nn.Module):
|
||||
- hidden states
|
||||
- attentions
|
||||
"""
|
||||
if "decoder_cached_states" in unused:
|
||||
warnings.warn(
|
||||
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = unused.pop("decoder_cached_states")
|
||||
if "decoder_past_key_values" in unused:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = unused.pop("decoder_past_key_values")
|
||||
|
||||
# check attention mask and invert
|
||||
if encoder_padding_mask is not None:
|
||||
@@ -873,14 +859,7 @@ class BartModel(PretrainedBartModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
if "decoder_past_key_values" in kwargs:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("decoder_past_key_values")
|
||||
|
||||
if decoder_input_ids is None:
|
||||
use_cache = False
|
||||
@@ -1006,7 +985,6 @@ class BartForConditionalGeneration(PretrainedBartModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**unused,
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
@@ -1034,24 +1012,6 @@ class BartForConditionalGeneration(PretrainedBartModel):
|
||||
>>> tokenizer.decode(predictions).split()
|
||||
>>> # ['good', 'great', 'all', 'really', 'very']
|
||||
"""
|
||||
if "lm_labels" in unused:
|
||||
warnings.warn(
|
||||
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = unused.pop("lm_labels")
|
||||
if "decoder_cached_states" in unused:
|
||||
warnings.warn(
|
||||
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = unused.pop("decoder_cached_states")
|
||||
if "decoder_past_key_values" in unused:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = unused.pop("decoder_past_key_values")
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
if labels is not None:
|
||||
|
||||
@@ -896,7 +896,6 @@ class BertForPreTraining(BertPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape ``(batch_size, sequence_length)``, `optional`):
|
||||
@@ -928,13 +927,6 @@ class BertForPreTraining(BertPreTrainedModel):
|
||||
>>> prediction_logits = outputs.prediction_logits
|
||||
>>> seq_relationship_logits = outputs.seq_relationship_logits
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
outputs = self.bert(
|
||||
@@ -1136,24 +1128,13 @@ class BertForMaskedLM(BertPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
assert "lm_labels" not in kwargs, "Use `BertWithLMHead` for autoregressive language modeling task."
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
|
||||
@@ -15,9 +15,6 @@
|
||||
# limitations under the License.
|
||||
""" PyTorch CTRL model."""
|
||||
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@@ -369,15 +366,7 @@ class CTRLModel(CTRLPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
if "past" in kwargs:
|
||||
warnings.warn(
|
||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("past")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
@@ -542,7 +531,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
@@ -550,13 +538,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
|
||||
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
|
||||
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
|
||||
"""
|
||||
if "past" in kwargs:
|
||||
warnings.warn(
|
||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("past")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
transformer_outputs = self.transformer(
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
|
||||
import copy
|
||||
import math
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -526,23 +525,13 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``.
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
dlbrt_output = self.distilbert(
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
import math
|
||||
import os
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -1000,23 +999,13 @@ class ElectraForMaskedLM(ElectraPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
generator_hidden_states = self.electra(
|
||||
|
||||
@@ -29,7 +29,6 @@
|
||||
|
||||
import math
|
||||
import random
|
||||
import warnings
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
@@ -618,7 +617,6 @@ class FSMTDecoder(nn.Module):
|
||||
output_attentions=False,
|
||||
output_hidden_states=False,
|
||||
return_dict=True,
|
||||
**unused,
|
||||
):
|
||||
"""
|
||||
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
|
||||
@@ -640,19 +638,6 @@ class FSMTDecoder(nn.Module):
|
||||
- hidden states
|
||||
- attentions
|
||||
"""
|
||||
if "decoder_cached_states" in unused:
|
||||
warnings.warn(
|
||||
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = unused.pop("decoder_cached_states")
|
||||
if "decoder_past_key_values" in unused:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = unused.pop("decoder_past_key_values")
|
||||
|
||||
# check attention mask and invert
|
||||
if encoder_padding_mask is not None:
|
||||
encoder_padding_mask = invert_mask(encoder_padding_mask)
|
||||
@@ -933,15 +918,7 @@ class FSMTModel(PretrainedFSMTModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
if "decoder_past_key_values" in kwargs:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("decoder_past_key_values")
|
||||
|
||||
if decoder_input_ids is None:
|
||||
use_cache = False
|
||||
|
||||
@@ -1071,7 +1048,6 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**unused,
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
"""PyTorch OpenAI GPT-2 model."""
|
||||
|
||||
import os
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
@@ -528,16 +527,7 @@ class GPT2Model(GPT2PreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
if "past" in kwargs:
|
||||
warnings.warn(
|
||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("past")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||
@@ -758,7 +748,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
@@ -766,13 +755,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
||||
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
|
||||
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
|
||||
"""
|
||||
if "past" in kwargs:
|
||||
warnings.warn(
|
||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("past")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
transformer_outputs = self.transformer(
|
||||
@@ -900,8 +882,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
||||
Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
|
||||
num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
|
||||
`input_ids` above)
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
|
||||
Return:
|
||||
|
||||
@@ -930,19 +910,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
||||
>>> mc_logits = outputs.mc_logits
|
||||
|
||||
"""
|
||||
if "lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("lm_labels")
|
||||
if "past" in kwargs:
|
||||
warnings.warn(
|
||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("past")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
transformer_outputs = self.transformer(
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import warnings
|
||||
from functools import lru_cache
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -293,13 +292,6 @@ class GPT2Tokenizer(PreTrainedTokenizer):
|
||||
return vocab_file, merge_file
|
||||
|
||||
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
||||
if is_split_into_words or add_prefix_space:
|
||||
text = " " + text
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from tokenizers import pre_tokenizers
|
||||
@@ -151,13 +150,6 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
|
||||
self.add_prefix_space = add_prefix_space
|
||||
|
||||
def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
is_split_into_words = kwargs.get("is_split_into_words", False)
|
||||
assert self.add_prefix_space or not is_split_into_words, (
|
||||
f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True "
|
||||
@@ -167,14 +159,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
|
||||
return super()._batch_encode_plus(*args, **kwargs)
|
||||
|
||||
def _encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
else:
|
||||
is_split_into_words = kwargs.get("is_split_into_words", False)
|
||||
is_split_into_words = kwargs.get("is_split_into_words", False)
|
||||
|
||||
assert self.add_prefix_space or not is_split_into_words, (
|
||||
f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True "
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
"""PyTorch Longformer model. """
|
||||
|
||||
import math
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -1509,7 +1508,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
@@ -1538,14 +1536,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
|
||||
>>> loss = outputs.loss
|
||||
>>> prediction_logits = output.logits
|
||||
"""
|
||||
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
outputs = self.longformer(
|
||||
|
||||
@@ -1109,7 +1109,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
@@ -1119,12 +1118,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
outputs = self.mobilebert(
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -645,7 +644,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input):
|
||||
@@ -659,8 +657,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
||||
Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
|
||||
num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
|
||||
`input_ids` above)
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
|
||||
Return:
|
||||
|
||||
@@ -683,13 +679,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
||||
>>> mc_logits = outputs.mc_logits
|
||||
"""
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
if "lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("lm_labels")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids,
|
||||
|
||||
@@ -302,7 +302,7 @@ class ProphetNetTokenizer(PreTrainedTokenizer):
|
||||
**kwargs,
|
||||
) -> BatchEncoding:
|
||||
if max_length is None:
|
||||
max_length = self.max_len
|
||||
max_length = self.model_max_length
|
||||
model_inputs = self(
|
||||
src_texts,
|
||||
add_special_tokens=True,
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
"""PyTorch RoBERTa model. """
|
||||
|
||||
import math
|
||||
import warnings
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@@ -872,7 +871,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||
@@ -882,13 +880,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
"""
|
||||
if "masked_lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("masked_lm_labels")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
outputs = self.roberta(
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
# limitations under the License.
|
||||
"""Tokenization classes for RoBERTa."""
|
||||
|
||||
import warnings
|
||||
from typing import List, Optional
|
||||
|
||||
from ...tokenization_utils import AddedToken
|
||||
@@ -251,13 +250,6 @@ class RobertaTokenizer(GPT2Tokenizer):
|
||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||
|
||||
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
||||
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
|
||||
text = " " + text
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
import copy
|
||||
import math
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
@@ -1048,7 +1047,6 @@ class T5Model(T5PreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
Returns:
|
||||
@@ -1066,20 +1064,6 @@ class T5Model(T5PreTrainedModel):
|
||||
|
||||
>>> last_hidden_states = outputs.last_hidden_state
|
||||
"""
|
||||
if "decoder_past_key_value_states" in kwargs:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("decoder_past_key_value_states")
|
||||
if "decoder_past_key_values" in kwargs:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("decoder_past_key_values")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
@@ -1198,15 +1182,12 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
**kwargs,
|
||||
):
|
||||
r"""
|
||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
|
||||
Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[-100, 0, ...,
|
||||
config.vocab_size - 1]`. All labels set to ``-100`` are ignored (masked), the loss is only computed for
|
||||
labels in ``[0, ..., config.vocab_size]``
|
||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||
Used to hide legacy arguments that have been deprecated.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -1226,27 +1207,6 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
|
||||
>>> input_ids = tokenizer("summarize: studies have shown that owning a dog is good for you ", return_tensors="pt").input_ids # Batch size 1
|
||||
>>> outputs = model.generate(input_ids)
|
||||
"""
|
||||
|
||||
if "lm_labels" in kwargs:
|
||||
warnings.warn(
|
||||
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
labels = kwargs.pop("lm_labels")
|
||||
if "decoder_past_key_value_states" in kwargs:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("decoder_past_key_value_states")
|
||||
if "decoder_past_key_values" in kwargs:
|
||||
warnings.warn(
|
||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("decoder_past_key_values")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||
|
||||
|
||||
@@ -595,7 +595,6 @@ class TFT5MainLayer(tf.keras.layers.Layer):
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
training=False,
|
||||
**kwargs,
|
||||
) -> Tuple:
|
||||
if isinstance(inputs, (tuple, list)):
|
||||
input_ids = inputs[0]
|
||||
@@ -621,21 +620,8 @@ class TFT5MainLayer(tf.keras.layers.Layer):
|
||||
output_attentions = inputs.get("output_attentions", output_attentions)
|
||||
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
||||
assert len(inputs) <= 10, "Too many inputs."
|
||||
|
||||
if "past_key_values" in inputs:
|
||||
warnings.warn(
|
||||
"The `past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = inputs.pop("past_key_values")
|
||||
else:
|
||||
input_ids = inputs
|
||||
if "past_key_values" in kwargs:
|
||||
warnings.warn(
|
||||
"The `past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("past_key_values")
|
||||
|
||||
output_attentions = output_attentions if output_attentions is not None else self.output_attentions
|
||||
output_hidden_states = output_hidden_states if output_hidden_states is not None else self.output_hidden_states
|
||||
@@ -1078,23 +1064,9 @@ class TFT5Model(TFT5PreTrainedModel):
|
||||
output_attentions = inputs.get("output_attentions", output_attentions)
|
||||
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
||||
assert len(inputs) <= 13, "Too many inputs."
|
||||
|
||||
if "past_key_value_states" in inputs:
|
||||
warnings.warn(
|
||||
"The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = inputs.pop("past_key_value_states")
|
||||
else:
|
||||
input_ids = inputs
|
||||
|
||||
if "past_key_value_states" in kwargs:
|
||||
warnings.warn(
|
||||
"The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("past_key_value_states")
|
||||
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
output_attentions = output_attentions if output_attentions else self.config.output_attentions
|
||||
output_hidden_states = output_hidden_states if output_hidden_states else self.config.output_hidden_states
|
||||
@@ -1294,23 +1266,9 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
|
||||
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
||||
return_dict = inputs.get("return_dict", return_dict)
|
||||
assert len(inputs) <= 14, "Too many inputs."
|
||||
|
||||
if "past_key_value_states" in inputs:
|
||||
warnings.warn(
|
||||
"The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = inputs.pop("past_key_value_states")
|
||||
else:
|
||||
input_ids = inputs
|
||||
|
||||
if "past_key_value_states" in kwargs:
|
||||
warnings.warn(
|
||||
"The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
past_key_values = kwargs.pop("past_key_value_states")
|
||||
|
||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||
output_attentions = output_attentions if output_attentions else self.config.output_attentions
|
||||
output_hidden_states = output_hidden_states if output_hidden_states else self.config.output_hidden_states
|
||||
|
||||
@@ -15,9 +15,6 @@
|
||||
# limitations under the License.
|
||||
""" Transformer XL configuration """
|
||||
|
||||
|
||||
import warnings
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
|
||||
@@ -139,13 +136,6 @@ class TransfoXLConfig(PretrainedConfig):
|
||||
eos_token_id=0,
|
||||
**kwargs
|
||||
):
|
||||
if "tie_weight" in kwargs:
|
||||
warnings.warn(
|
||||
"The config parameter `tie_weight` is deprecated. Please use `tie_word_embeddings` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
kwargs["tie_word_embeddings"] = kwargs["tie_weight"]
|
||||
|
||||
super().__init__(eos_token_id=eos_token_id, **kwargs)
|
||||
self.vocab_size = vocab_size
|
||||
self.cutoffs = []
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
"""
|
||||
TF 2.0 Transformer XL model.
|
||||
"""
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
@@ -865,13 +864,6 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
|
||||
return self.crit.out_layers[-1]
|
||||
return None
|
||||
|
||||
def reset_length(self, tgt_len, ext_len, mem_len):
|
||||
warnings.warn(
|
||||
"The method `reset_length` is deprecated and will be removed in a future version, use `reset_memory_length` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
self.transformer.reset_memory_length(mem_len)
|
||||
|
||||
def reset_memory_length(self, mem_len):
|
||||
self.transformer.reset_memory_length(mem_len)
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
PyTorch Transformer XL model. Adapted from https://github.com/kimiyoung/transformer-xl. In particular
|
||||
https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py
|
||||
"""
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
@@ -1010,13 +1009,6 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
|
||||
else:
|
||||
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[i]
|
||||
|
||||
def reset_length(self, tgt_len, ext_len, mem_len):
|
||||
warnings.warn(
|
||||
"The method `reset_length` is deprecated and will be removed in a future version, use `reset_memory_length` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
self.transformer.reset_memory_length(mem_len)
|
||||
|
||||
def reset_memory_length(self, mem_len):
|
||||
self.transformer.reset_memory_length(mem_len)
|
||||
|
||||
|
||||
@@ -16,9 +16,7 @@
|
||||
TF 2.0 XLM model.
|
||||
"""
|
||||
|
||||
|
||||
import itertools
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -997,10 +995,9 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
|
||||
)
|
||||
|
||||
if lengths is not None:
|
||||
warnings.warn(
|
||||
logger.warn(
|
||||
"The `lengths` parameter cannot be used with the XLM multiple choice models. Please use the "
|
||||
"attention mask instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
lengths = None
|
||||
|
||||
|
||||
@@ -16,10 +16,8 @@
|
||||
PyTorch XLM model.
|
||||
"""
|
||||
|
||||
|
||||
import itertools
|
||||
import math
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -1228,10 +1226,9 @@ class XLMForMultipleChoice(XLMPreTrainedModel):
|
||||
)
|
||||
|
||||
if lengths is not None:
|
||||
warnings.warn(
|
||||
logger.warn(
|
||||
"The `lengths` parameter cannot be used with the XLM multiple choice models. Please use the "
|
||||
"attention mask instead.",
|
||||
FutureWarning,
|
||||
"attention mask instead."
|
||||
)
|
||||
lengths = None
|
||||
|
||||
|
||||
@@ -1182,7 +1182,6 @@ class FillMaskPipeline(Pipeline):
|
||||
device: int = -1,
|
||||
top_k=5,
|
||||
task: str = "",
|
||||
**kwargs
|
||||
):
|
||||
super().__init__(
|
||||
model=model,
|
||||
@@ -1196,15 +1195,7 @@ class FillMaskPipeline(Pipeline):
|
||||
)
|
||||
|
||||
self.check_model_type(TF_MODEL_WITH_LM_HEAD_MAPPING if self.framework == "tf" else MODEL_FOR_MASKED_LM_MAPPING)
|
||||
|
||||
if "topk" in kwargs:
|
||||
warnings.warn(
|
||||
"The `topk` argument is deprecated and will be removed in a future version, use `top_k` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
self.top_k = kwargs.pop("topk")
|
||||
else:
|
||||
self.top_k = top_k
|
||||
self.top_k = top_k
|
||||
|
||||
def ensure_exactly_one_mask_token(self, masked_index: np.ndarray):
|
||||
numel = np.prod(masked_index.shape)
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
import itertools
|
||||
import re
|
||||
import unicodedata
|
||||
import warnings
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, overload
|
||||
|
||||
from .file_utils import add_end_docstrings
|
||||
@@ -246,12 +245,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
||||
Returns:
|
||||
:obj:`List[str]`: The list of tokens.
|
||||
"""
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
kwargs["is_split_into_words"] = kwargs.pop("is_pretokenized")
|
||||
# Simple mapping string => AddedToken for special tokens with specific tokenization behaviors
|
||||
all_special_tokens_extended = dict(
|
||||
(str(t), t) for t in self.all_special_tokens_extended if isinstance(t, AddedToken)
|
||||
@@ -448,13 +441,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
||||
"https://github.com/huggingface/transformers/pull/2674"
|
||||
)
|
||||
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
first_ids = get_input_ids(text)
|
||||
second_ids = get_input_ids(text_pair) if text_pair is not None else None
|
||||
|
||||
@@ -530,13 +516,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
||||
"transformers.PreTrainedTokenizerFast."
|
||||
)
|
||||
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
input_ids = []
|
||||
for ids_or_pair_ids in batch_text_or_text_pairs:
|
||||
if not isinstance(ids_or_pair_ids, (list, tuple)):
|
||||
|
||||
@@ -1532,18 +1532,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
|
||||
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@property
|
||||
def max_len(self) -> int:
|
||||
"""
|
||||
:obj:`int`: **Deprecated** Kept here for backward compatibility. Now renamed to :obj:`model_max_length` to
|
||||
avoid ambiguity.
|
||||
"""
|
||||
warnings.warn(
|
||||
"The `max_len` attribute has been deprecated and will be removed in a future version, use `model_max_length` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
return self.model_max_length
|
||||
|
||||
@property
|
||||
def max_len_single_sentence(self) -> int:
|
||||
"""
|
||||
@@ -2785,15 +2773,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
|
||||
and ``convert_tokens_to_ids`` methods.
|
||||
"""
|
||||
|
||||
if "return_lengths" in kwargs:
|
||||
if verbose:
|
||||
warnings.warn(
|
||||
"The PreTrainedTokenizerBase.prepare_for_model `return_lengths` parameter is deprecated. "
|
||||
"Please use `return_length` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
return_length = kwargs["return_lengths"]
|
||||
|
||||
# Backward compatibility for 'truncation_strategy', 'pad_to_max_length'
|
||||
padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
|
||||
padding=padding,
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import warnings
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
@@ -357,7 +356,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
||||
return_offsets_mapping: bool = False,
|
||||
return_length: bool = False,
|
||||
verbose: bool = True,
|
||||
**kwargs
|
||||
) -> BatchEncoding:
|
||||
|
||||
if not isinstance(batch_text_or_text_pairs, list):
|
||||
@@ -365,16 +363,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
||||
"batch_text_or_text_pairs has to be a list (got {})".format(type(batch_text_or_text_pairs))
|
||||
)
|
||||
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
if kwargs:
|
||||
raise ValueError(f"Keyword arguments {kwargs} not recognized.")
|
||||
|
||||
# Set the truncation and padding strategy and restore the initial configuration
|
||||
self.set_truncation_and_padding(
|
||||
padding_strategy=padding_strategy,
|
||||
@@ -453,12 +441,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
||||
verbose: bool = True,
|
||||
**kwargs
|
||||
) -> BatchEncoding:
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
batched_input = [(text, text_pair)] if text_pair else [text]
|
||||
batched_output = self._batch_encode_plus(
|
||||
|
||||
@@ -213,8 +213,6 @@ class Trainer:
|
||||
containing the optimizer and the scheduler to use. Will default to an instance of
|
||||
:class:`~transformers.AdamW` on your model and a scheduler given by
|
||||
:func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`.
|
||||
kwargs:
|
||||
Deprecated keyword arguments.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -229,7 +227,6 @@ class Trainer:
|
||||
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
|
||||
callbacks: Optional[List[TrainerCallback]] = None,
|
||||
optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
|
||||
**kwargs,
|
||||
):
|
||||
if args is None:
|
||||
logger.info("No `TrainingArguments` passed, using the current path as `output_dir`.")
|
||||
@@ -262,27 +259,6 @@ class Trainer:
|
||||
self.callback_handler = CallbackHandler(callbacks, self.model, self.optimizer, self.lr_scheduler)
|
||||
self.add_callback(PrinterCallback if self.args.disable_tqdm else DEFAULT_PROGRESS_CALLBACK)
|
||||
|
||||
# Deprecated arguments
|
||||
if "tb_writer" in kwargs:
|
||||
warnings.warn(
|
||||
"Passing `tb_writer` as a keyword argument is deprecated and won't be possible in a "
|
||||
+ "future version. Use `TensorBoardCallback(tb_writer=...)` instead and pass it to the `callbacks`"
|
||||
+ "argument",
|
||||
FutureWarning,
|
||||
)
|
||||
tb_writer = kwargs.pop("tb_writer")
|
||||
self.remove_callback(TensorBoardCallback)
|
||||
self.add_callback(TensorBoardCallback(tb_writer=tb_writer))
|
||||
if "prediction_loss_only" in kwargs:
|
||||
warnings.warn(
|
||||
"Passing `prediction_loss_only` as a keyword argument is deprecated and won't be possible in a "
|
||||
+ "future version. Use `args.prediction_loss_only` instead. Setting "
|
||||
+ f"`args.prediction_loss_only={kwargs['prediction_loss_only']}",
|
||||
FutureWarning,
|
||||
)
|
||||
self.args.prediction_loss_only = kwargs.pop("prediction_loss_only")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
|
||||
# Will be set to True by `self._setup_loggers()` on first call to `self.log()`.
|
||||
self._loggers_initialized = False
|
||||
|
||||
@@ -294,14 +270,7 @@ class Trainer:
|
||||
# We'll find a more elegant and not need to do this in the future.
|
||||
self.model.config.xla_device = True
|
||||
if not callable(self.data_collator) and callable(getattr(self.data_collator, "collate_batch", None)):
|
||||
self.data_collator = self.data_collator.collate_batch
|
||||
warnings.warn(
|
||||
(
|
||||
"The `data_collator` should now be a simple callable (function, class with `__call__`), classes "
|
||||
+ "with a `collate_batch` are deprecated and won't be supported in a future version."
|
||||
),
|
||||
FutureWarning,
|
||||
)
|
||||
raise ValueError("The `data_collator` should be a simple callable (function, class with `__call__`).")
|
||||
|
||||
if args.max_steps > 0:
|
||||
logger.info("max_steps is given, it will override any value given in num_train_epochs")
|
||||
@@ -1050,12 +1019,6 @@ class Trainer:
|
||||
logs (:obj:`Dict[str, float]`):
|
||||
The values to log.
|
||||
"""
|
||||
if hasattr(self, "_log"):
|
||||
warnings.warn(
|
||||
"The `_log` method is deprecated and won't be called in a future version, define `log` in your subclass.",
|
||||
FutureWarning,
|
||||
)
|
||||
return self._log(logs)
|
||||
if self.state.epoch is not None:
|
||||
logs["epoch"] = self.state.epoch
|
||||
|
||||
@@ -1095,12 +1058,6 @@ class Trainer:
|
||||
Return:
|
||||
:obj:`torch.Tensor`: The tensor with training loss on this batch.
|
||||
"""
|
||||
if hasattr(self, "_training_step"):
|
||||
warnings.warn(
|
||||
"The `_training_step` method is deprecated and won't be called in a future version, define `training_step` in your subclass.",
|
||||
FutureWarning,
|
||||
)
|
||||
return self._training_step(model, inputs, self.optimizer)
|
||||
|
||||
model.train()
|
||||
inputs = self._prepare_inputs(inputs)
|
||||
@@ -1140,18 +1097,6 @@ class Trainer:
|
||||
# We don't use .loss here since the model may return tuples instead of ModelOutput.
|
||||
return outputs[0]
|
||||
|
||||
def is_local_master(self) -> bool:
|
||||
"""
|
||||
Whether or not this process is the local (e.g., on one machine if training in a distributed fashion on several
|
||||
machines) main process.
|
||||
|
||||
.. warning::
|
||||
|
||||
This method is deprecated, use :meth:`~transformers.Trainer.is_local_process_zero` instead.
|
||||
"""
|
||||
warnings.warn("This method is deprecated, use `Trainer.is_local_process_zero()` instead.", FutureWarning)
|
||||
return self.is_local_process_zero()
|
||||
|
||||
def is_local_process_zero(self) -> bool:
|
||||
"""
|
||||
Whether or not this process is the local (e.g., on one machine if training in a distributed fashion on several
|
||||
@@ -1162,18 +1107,6 @@ class Trainer:
|
||||
else:
|
||||
return self.args.local_rank in [-1, 0]
|
||||
|
||||
def is_world_master(self) -> bool:
|
||||
"""
|
||||
Whether or not this process is the global main process (when training in a distributed fashion on several
|
||||
machines, this is only going to be :obj:`True` for one process).
|
||||
|
||||
.. warning::
|
||||
|
||||
This method is deprecated, use :meth:`~transformers.Trainer.is_world_process_zero` instead.
|
||||
"""
|
||||
warnings.warn("This method is deprecated, use `Trainer.is_world_process_zero()` instead.", FutureWarning)
|
||||
return self.is_world_process_zero()
|
||||
|
||||
def is_world_process_zero(self) -> bool:
|
||||
"""
|
||||
Whether or not this process is the global main process (when training in a distributed fashion on several
|
||||
@@ -1362,13 +1295,6 @@ class Trainer:
|
||||
|
||||
Works both with or without labels.
|
||||
"""
|
||||
if hasattr(self, "_prediction_loop"):
|
||||
warnings.warn(
|
||||
"The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.",
|
||||
FutureWarning,
|
||||
)
|
||||
return self._prediction_loop(dataloader, description, prediction_loss_only=prediction_loss_only)
|
||||
|
||||
if not isinstance(dataloader.dataset, collections.abc.Sized):
|
||||
raise ValueError("dataset must implement __len__")
|
||||
prediction_loss_only = (
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import datetime
|
||||
import math
|
||||
import os
|
||||
import warnings
|
||||
from typing import Callable, Dict, Optional, Tuple
|
||||
|
||||
|
||||
@@ -66,8 +65,6 @@ class TFTrainer:
|
||||
:class:`~transformers.AdamWeightDecay`. The scheduler will default to an instance of
|
||||
:class:`tf.keras.optimizers.schedules.PolynomialDecay` if :obj:`args.num_warmup_steps` is 0 else an
|
||||
instance of :class:`~transformers.WarmUp`.
|
||||
kwargs:
|
||||
Deprecated keyword arguments.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -82,7 +79,6 @@ class TFTrainer:
|
||||
None,
|
||||
None,
|
||||
),
|
||||
**kwargs,
|
||||
):
|
||||
assert parse(tf.__version__).release >= (2, 2, 0), (
|
||||
"You need to run the TensorFlow trainer with at least the version 2.2.0, your version is %r "
|
||||
@@ -98,13 +94,6 @@ class TFTrainer:
|
||||
self.gradient_accumulator = GradientAccumulator()
|
||||
self.global_step = 0
|
||||
self.epoch_logging = 0
|
||||
if "prediction_loss_only" in kwargs:
|
||||
warnings.warn(
|
||||
"Passing `prediction_loss_only` as a keyword argument is deprecated and won't be possible in a future version. Use `args.prediction_loss_only` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
self.args.prediction_loss_only = kwargs.pop("prediction_loss_only")
|
||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
||||
|
||||
if tb_writer is not None:
|
||||
self.tb_writer = tb_writer
|
||||
@@ -249,12 +238,6 @@ class TFTrainer:
|
||||
WANDB_DISABLED:
|
||||
(Optional): boolean - defaults to false, set to "true" to disable wandb entirely.
|
||||
"""
|
||||
if hasattr(self, "_setup_wandb"):
|
||||
warnings.warn(
|
||||
"The `_setup_wandb` method is deprecated and won't be called in a future version, define `setup_wandb` in your subclass.",
|
||||
FutureWarning,
|
||||
)
|
||||
return self._setup_wandb()
|
||||
|
||||
logger.info('Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"')
|
||||
combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()}
|
||||
@@ -304,14 +287,6 @@ class TFTrainer:
|
||||
|
||||
Works both with or without labels.
|
||||
"""
|
||||
if hasattr(self, "_prediction_loop"):
|
||||
warnings.warn(
|
||||
"The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.",
|
||||
FutureWarning,
|
||||
)
|
||||
return self._prediction_loop(
|
||||
dataset, steps, num_examples, description, prediction_loss_only=prediction_loss_only
|
||||
)
|
||||
|
||||
prediction_loss_only = (
|
||||
prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
|
||||
@@ -393,12 +368,6 @@ class TFTrainer:
|
||||
logs (:obj:`Dict[str, float]`):
|
||||
The values to log.
|
||||
"""
|
||||
if hasattr(self, "_log"):
|
||||
warnings.warn(
|
||||
"The `_log` method is deprecated and won't be called in a future version, define `log` in your subclass.",
|
||||
FutureWarning,
|
||||
)
|
||||
return self._log(logs)
|
||||
logs["epoch"] = self.epoch_logging
|
||||
|
||||
if self.tb_writer:
|
||||
@@ -733,12 +702,6 @@ class TFTrainer:
|
||||
Returns:
|
||||
A tuple of two :obj:`tf.Tensor`: The loss and logits.
|
||||
"""
|
||||
if hasattr(self, "_run_model"):
|
||||
warnings.warn(
|
||||
"The `_run_model` method is deprecated and won't be called in a future version, define `run_model` in your subclass.",
|
||||
FutureWarning,
|
||||
)
|
||||
return self._run_model(features, labels, training)
|
||||
|
||||
if self.args.past_index >= 0 and getattr(self, "_past", None) is not None:
|
||||
features["mems"] = self._past
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
@@ -198,10 +197,6 @@ class TrainingArguments:
|
||||
do_train: bool = field(default=False, metadata={"help": "Whether to run training."})
|
||||
do_eval: bool = field(default=None, metadata={"help": "Whether to run eval on the dev set."})
|
||||
do_predict: bool = field(default=False, metadata={"help": "Whether to run predictions on the test set."})
|
||||
evaluate_during_training: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Run evaluation during training at each logging step."},
|
||||
)
|
||||
evaluation_strategy: EvaluationStrategy = field(
|
||||
default="no",
|
||||
metadata={"help": "Run evaluation during training at each logging step."},
|
||||
@@ -340,12 +335,6 @@ class TrainingArguments:
|
||||
def __post_init__(self):
|
||||
if self.disable_tqdm is None:
|
||||
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
|
||||
if self.evaluate_during_training is True:
|
||||
self.evaluation_strategy = EvaluationStrategy.STEPS
|
||||
warnings.warn(
|
||||
"The `evaluate_during_training` argument is deprecated in favor of `evaluation_strategy` (which has more options)",
|
||||
FutureWarning,
|
||||
)
|
||||
self.evaluation_strategy = EvaluationStrategy(self.evaluation_strategy)
|
||||
if self.do_eval is False and self.evaluation_strategy != EvaluationStrategy.NO:
|
||||
self.do_eval = True
|
||||
|
||||
@@ -73,7 +73,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||
import warnings
|
||||
from typing import List, Optional
|
||||
|
||||
from tokenizers import ByteLevelBPETokenizer
|
||||
@@ -234,13 +233,6 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(PreTrainedTokenizer):
|
||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||
|
||||
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
||||
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
|
||||
text = " " + text
|
||||
@@ -285,29 +277,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast)
|
||||
)
|
||||
self.add_prefix_space = add_prefix_space
|
||||
|
||||
def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
||||
is_split_into_words = None
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
||||
|
||||
is_split_into_words = kwargs.get("is_split_into_words", False) if is_split_into_words is None else is_split_into_words
|
||||
return super()._batch_encode_plus(*args, **kwargs)
|
||||
|
||||
def _encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
||||
is_split_into_words = None
|
||||
if "is_pretokenized" in kwargs:
|
||||
warnings.warn(
|
||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
is_split_into_words = kwargs.get("is_split_into_words", False) if is_split_into_words is None else is_split_into_words
|
||||
return super()._encode_plus(*args, **kwargs)
|
||||
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
||||
if token_ids_1 is None:
|
||||
|
||||
@@ -213,7 +213,9 @@ class GPT2ModelTester:
|
||||
next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)
|
||||
|
||||
output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, token_type_ids=next_token_types, past_key_values=past)[
|
||||
"last_hidden_state"
|
||||
]
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
||||
@@ -255,7 +257,7 @@ class GPT2ModelTester:
|
||||
|
||||
# get two different outputs
|
||||
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, past=past, attention_mask=attn_mask)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, past_key_values=past, attention_mask=attn_mask)["last_hidden_state"]
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
||||
@@ -286,7 +288,9 @@ class GPT2ModelTester:
|
||||
next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)
|
||||
|
||||
output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, token_type_ids=next_token_types, past_key_values=past)[
|
||||
"last_hidden_state"
|
||||
]
|
||||
self.parent.assertTrue(output_from_past.shape[1] == next_tokens.shape[1])
|
||||
|
||||
# select random slice
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
from transformers import pipeline
|
||||
from transformers.testing_utils import require_tf, require_torch, slow
|
||||
|
||||
@@ -53,13 +51,6 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
|
||||
]
|
||||
expected_check_keys = ["sequence"]
|
||||
|
||||
@require_torch
|
||||
def test_torch_topk_deprecation(self):
|
||||
# At pipeline initialization only it was not enabled at pipeline
|
||||
# call site before
|
||||
with pytest.warns(FutureWarning, match=r".*use `top_k`.*"):
|
||||
pipeline(task="fill-mask", model=self.small_models[0], topk=1)
|
||||
|
||||
@require_torch
|
||||
def test_torch_fill_mask(self):
|
||||
valid_inputs = "My name is <mask>"
|
||||
|
||||
@@ -83,7 +83,7 @@ class AutoTokenizerTest(unittest.TestCase):
|
||||
else:
|
||||
self.assertEqual(tokenizer.do_lower_case, False)
|
||||
|
||||
self.assertEqual(tokenizer.max_len, 512)
|
||||
self.assertEqual(tokenizer.model_max_length, 512)
|
||||
|
||||
@require_tokenizers
|
||||
def test_tokenizer_identifier_non_existent(self):
|
||||
|
||||
Reference in New Issue
Block a user