Remove deprecated (#8604)
* Remove old deprecated arguments Co-authored-by: LysandreJik <lysandre.debut@reseau.eseo.fr> * Remove needless imports * Fix tests Co-authored-by: LysandreJik <lysandre.debut@reseau.eseo.fr>
This commit is contained in:
@@ -138,7 +138,7 @@ class TestFinetuneTrainer(TestCasePlus):
|
|||||||
per_device_train_batch_size=batch_size,
|
per_device_train_batch_size=batch_size,
|
||||||
per_device_eval_batch_size=batch_size,
|
per_device_eval_batch_size=batch_size,
|
||||||
predict_with_generate=True,
|
predict_with_generate=True,
|
||||||
evaluate_during_training=True,
|
evaluation_strategy="steps",
|
||||||
do_train=True,
|
do_train=True,
|
||||||
do_eval=True,
|
do_eval=True,
|
||||||
warmup_steps=0,
|
warmup_steps=0,
|
||||||
@@ -179,7 +179,7 @@ class TestFinetuneTrainer(TestCasePlus):
|
|||||||
--per_device_eval_batch_size 4
|
--per_device_eval_batch_size 4
|
||||||
--learning_rate 3e-3
|
--learning_rate 3e-3
|
||||||
--warmup_steps 8
|
--warmup_steps 8
|
||||||
--evaluate_during_training
|
--evaluation_strategy steps
|
||||||
--predict_with_generate
|
--predict_with_generate
|
||||||
--logging_steps 0
|
--logging_steps 0
|
||||||
--save_steps {str(eval_steps)}
|
--save_steps {str(eval_steps)}
|
||||||
|
|||||||
@@ -254,7 +254,7 @@ def main():
|
|||||||
trainer.save_model()
|
trainer.save_model()
|
||||||
# For convenience, we also re-save the tokenizer to the same directory,
|
# For convenience, we also re-save the tokenizer to the same directory,
|
||||||
# so that you can share your model easily on huggingface.co/models =)
|
# so that you can share your model easily on huggingface.co/models =)
|
||||||
if trainer.is_world_master():
|
if trainer.is_world_process_zero():
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
@@ -265,7 +265,7 @@ def main():
|
|||||||
result = trainer.evaluate()
|
result = trainer.evaluate()
|
||||||
|
|
||||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
||||||
if trainer.is_world_master():
|
if trainer.is_world_process_zero():
|
||||||
with open(output_eval_file, "w") as writer:
|
with open(output_eval_file, "w") as writer:
|
||||||
logger.info("***** Eval results *****")
|
logger.info("***** Eval results *****")
|
||||||
for key, value in result.items():
|
for key, value in result.items():
|
||||||
|
|||||||
@@ -145,11 +145,11 @@ def squad_convert_example_to_features(
|
|||||||
# in the way they compute mask of added tokens.
|
# in the way they compute mask of added tokens.
|
||||||
tokenizer_type = type(tokenizer).__name__.replace("Tokenizer", "").lower()
|
tokenizer_type = type(tokenizer).__name__.replace("Tokenizer", "").lower()
|
||||||
sequence_added_tokens = (
|
sequence_added_tokens = (
|
||||||
tokenizer.max_len - tokenizer.max_len_single_sentence + 1
|
tokenizer.model_max_length - tokenizer.max_len_single_sentence + 1
|
||||||
if tokenizer_type in MULTI_SEP_TOKENS_TOKENIZERS_SET
|
if tokenizer_type in MULTI_SEP_TOKENS_TOKENIZERS_SET
|
||||||
else tokenizer.max_len - tokenizer.max_len_single_sentence
|
else tokenizer.model_max_length - tokenizer.max_len_single_sentence
|
||||||
)
|
)
|
||||||
sequence_pair_added_tokens = tokenizer.max_len - tokenizer.max_len_sentences_pair
|
sequence_pair_added_tokens = tokenizer.model_max_length - tokenizer.max_len_sentences_pair
|
||||||
|
|
||||||
span_doc_tokens = all_doc_tokens
|
span_doc_tokens = all_doc_tokens
|
||||||
while len(spans) * doc_stride < len(all_doc_tokens):
|
while len(spans) * doc_stride < len(all_doc_tokens):
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
@@ -742,7 +741,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`):
|
labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`):
|
||||||
@@ -753,8 +751,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
|||||||
Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
|
Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
|
||||||
(see :obj:`input_ids` docstring) Indices should be in ``[0, 1]``. ``0`` indicates original order (sequence
|
(see :obj:`input_ids` docstring) Indices should be in ``[0, 1]``. ``0`` indicates original order (sequence
|
||||||
A, then sequence B), ``1`` indicates switched order (sequence B, then sequence A).
|
A, then sequence B), ``1`` indicates switched order (sequence B, then sequence A).
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
|
||||||
Used to hide legacy arguments that have been deprecated.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
@@ -773,14 +769,6 @@ class AlbertForPreTraining(AlbertPreTrainedModel):
|
|||||||
>>> sop_logits = outputs.sop_logits
|
>>> sop_logits = outputs.sop_logits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
outputs = self.albert(
|
outputs = self.albert(
|
||||||
@@ -898,23 +886,13 @@ class AlbertForMaskedLM(AlbertPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
|
||||||
Used to hide legacy arguments that have been deprecated.
|
|
||||||
"""
|
"""
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
outputs = self.albert(
|
outputs = self.albert(
|
||||||
|
|||||||
@@ -15,7 +15,6 @@
|
|||||||
"""PyTorch BART model, ported from the fairseq repo."""
|
"""PyTorch BART model, ported from the fairseq repo."""
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
import warnings
|
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -529,7 +528,6 @@ class BartDecoder(nn.Module):
|
|||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=True,
|
return_dict=True,
|
||||||
**unused,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
|
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
|
||||||
@@ -551,18 +549,6 @@ class BartDecoder(nn.Module):
|
|||||||
- hidden states
|
- hidden states
|
||||||
- attentions
|
- attentions
|
||||||
"""
|
"""
|
||||||
if "decoder_cached_states" in unused:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = unused.pop("decoder_cached_states")
|
|
||||||
if "decoder_past_key_values" in unused:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = unused.pop("decoder_past_key_values")
|
|
||||||
|
|
||||||
# check attention mask and invert
|
# check attention mask and invert
|
||||||
if encoder_padding_mask is not None:
|
if encoder_padding_mask is not None:
|
||||||
@@ -873,14 +859,7 @@ class BartModel(PretrainedBartModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
if "decoder_past_key_values" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("decoder_past_key_values")
|
|
||||||
|
|
||||||
if decoder_input_ids is None:
|
if decoder_input_ids is None:
|
||||||
use_cache = False
|
use_cache = False
|
||||||
@@ -1006,7 +985,6 @@ class BartForConditionalGeneration(PretrainedBartModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**unused,
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
@@ -1034,24 +1012,6 @@ class BartForConditionalGeneration(PretrainedBartModel):
|
|||||||
>>> tokenizer.decode(predictions).split()
|
>>> tokenizer.decode(predictions).split()
|
||||||
>>> # ['good', 'great', 'all', 'really', 'very']
|
>>> # ['good', 'great', 'all', 'really', 'very']
|
||||||
"""
|
"""
|
||||||
if "lm_labels" in unused:
|
|
||||||
warnings.warn(
|
|
||||||
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = unused.pop("lm_labels")
|
|
||||||
if "decoder_cached_states" in unused:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = unused.pop("decoder_cached_states")
|
|
||||||
if "decoder_past_key_values" in unused:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = unused.pop("decoder_past_key_values")
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
if labels is not None:
|
if labels is not None:
|
||||||
|
|||||||
@@ -896,7 +896,6 @@ class BertForPreTraining(BertPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape ``(batch_size, sequence_length)``, `optional`):
|
labels (:obj:`torch.LongTensor` of shape ``(batch_size, sequence_length)``, `optional`):
|
||||||
@@ -928,13 +927,6 @@ class BertForPreTraining(BertPreTrainedModel):
|
|||||||
>>> prediction_logits = outputs.prediction_logits
|
>>> prediction_logits = outputs.prediction_logits
|
||||||
>>> seq_relationship_logits = outputs.seq_relationship_logits
|
>>> seq_relationship_logits = outputs.seq_relationship_logits
|
||||||
"""
|
"""
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
outputs = self.bert(
|
outputs = self.bert(
|
||||||
@@ -1136,24 +1128,13 @@ class BertForMaskedLM(BertPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
|
||||||
Used to hide legacy arguments that have been deprecated.
|
|
||||||
"""
|
"""
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
assert "lm_labels" not in kwargs, "Use `BertWithLMHead` for autoregressive language modeling task."
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
|
|||||||
@@ -15,9 +15,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
""" PyTorch CTRL model."""
|
""" PyTorch CTRL model."""
|
||||||
|
|
||||||
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
@@ -369,15 +366,7 @@ class CTRLModel(CTRLPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
if "past" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("past")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
|
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||||
@@ -542,7 +531,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
@@ -550,13 +538,6 @@ class CTRLLMHeadModel(CTRLPreTrainedModel):
|
|||||||
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
|
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
|
||||||
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
|
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
|
||||||
"""
|
"""
|
||||||
if "past" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("past")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
transformer_outputs = self.transformer(
|
transformer_outputs = self.transformer(
|
||||||
|
|||||||
@@ -20,7 +20,6 @@
|
|||||||
|
|
||||||
import copy
|
import copy
|
||||||
import math
|
import math
|
||||||
import warnings
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
@@ -526,23 +525,13 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``.
|
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``.
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
|
||||||
Used to hide legacy arguments that have been deprecated.
|
|
||||||
"""
|
"""
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
dlbrt_output = self.distilbert(
|
dlbrt_output = self.distilbert(
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
@@ -1000,23 +999,13 @@ class ElectraForMaskedLM(ElectraPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
|
||||||
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
|
||||||
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
|
||||||
Used to hide legacy arguments that have been deprecated.
|
|
||||||
"""
|
"""
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
generator_hidden_states = self.electra(
|
generator_hidden_states = self.electra(
|
||||||
|
|||||||
@@ -29,7 +29,6 @@
|
|||||||
|
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
import warnings
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
@@ -618,7 +617,6 @@ class FSMTDecoder(nn.Module):
|
|||||||
output_attentions=False,
|
output_attentions=False,
|
||||||
output_hidden_states=False,
|
output_hidden_states=False,
|
||||||
return_dict=True,
|
return_dict=True,
|
||||||
**unused,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
|
Includes several features from "Jointly Learning to Align and Translate with Transformer Models" (Garg et al.,
|
||||||
@@ -640,19 +638,6 @@ class FSMTDecoder(nn.Module):
|
|||||||
- hidden states
|
- hidden states
|
||||||
- attentions
|
- attentions
|
||||||
"""
|
"""
|
||||||
if "decoder_cached_states" in unused:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_cached_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = unused.pop("decoder_cached_states")
|
|
||||||
if "decoder_past_key_values" in unused:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = unused.pop("decoder_past_key_values")
|
|
||||||
|
|
||||||
# check attention mask and invert
|
# check attention mask and invert
|
||||||
if encoder_padding_mask is not None:
|
if encoder_padding_mask is not None:
|
||||||
encoder_padding_mask = invert_mask(encoder_padding_mask)
|
encoder_padding_mask = invert_mask(encoder_padding_mask)
|
||||||
@@ -933,15 +918,7 @@ class FSMTModel(PretrainedFSMTModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
if "decoder_past_key_values" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("decoder_past_key_values")
|
|
||||||
|
|
||||||
if decoder_input_ids is None:
|
if decoder_input_ids is None:
|
||||||
use_cache = False
|
use_cache = False
|
||||||
|
|
||||||
@@ -1071,7 +1048,6 @@ class FSMTForConditionalGeneration(PretrainedFSMTModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**unused,
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
"""PyTorch OpenAI GPT-2 model."""
|
"""PyTorch OpenAI GPT-2 model."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
@@ -528,16 +527,7 @@ class GPT2Model(GPT2PreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
if "past" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("past")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
|
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
output_hidden_states = (
|
output_hidden_states = (
|
||||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
||||||
@@ -758,7 +748,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
@@ -766,13 +755,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
|||||||
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
|
``labels = input_ids`` Indices are selected in ``[-100, 0, ..., config.vocab_size]`` All labels set to
|
||||||
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
|
``-100`` are ignored (masked), the loss is only computed for labels in ``[0, ..., config.vocab_size]``
|
||||||
"""
|
"""
|
||||||
if "past" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("past")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
transformer_outputs = self.transformer(
|
transformer_outputs = self.transformer(
|
||||||
@@ -900,8 +882,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
|||||||
Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
|
Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
|
||||||
num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
|
num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
|
||||||
`input_ids` above)
|
`input_ids` above)
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
|
||||||
Used to hide legacy arguments that have been deprecated.
|
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
|
|
||||||
@@ -930,19 +910,6 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
|||||||
>>> mc_logits = outputs.mc_logits
|
>>> mc_logits = outputs.mc_logits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if "lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("lm_labels")
|
|
||||||
if "past" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("past")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
transformer_outputs = self.transformer(
|
transformer_outputs = self.transformer(
|
||||||
|
|||||||
@@ -17,7 +17,6 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
@@ -293,13 +292,6 @@ class GPT2Tokenizer(PreTrainedTokenizer):
|
|||||||
return vocab_file, merge_file
|
return vocab_file, merge_file
|
||||||
|
|
||||||
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
||||||
if is_split_into_words or add_prefix_space:
|
if is_split_into_words or add_prefix_space:
|
||||||
text = " " + text
|
text = " " + text
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import warnings
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from tokenizers import pre_tokenizers
|
from tokenizers import pre_tokenizers
|
||||||
@@ -151,13 +150,6 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
self.add_prefix_space = add_prefix_space
|
self.add_prefix_space = add_prefix_space
|
||||||
|
|
||||||
def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
is_split_into_words = kwargs.get("is_split_into_words", False)
|
is_split_into_words = kwargs.get("is_split_into_words", False)
|
||||||
assert self.add_prefix_space or not is_split_into_words, (
|
assert self.add_prefix_space or not is_split_into_words, (
|
||||||
f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True "
|
f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True "
|
||||||
@@ -167,14 +159,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
|
|||||||
return super()._batch_encode_plus(*args, **kwargs)
|
return super()._batch_encode_plus(*args, **kwargs)
|
||||||
|
|
||||||
def _encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
def _encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
||||||
if "is_pretokenized" in kwargs:
|
is_split_into_words = kwargs.get("is_split_into_words", False)
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
else:
|
|
||||||
is_split_into_words = kwargs.get("is_split_into_words", False)
|
|
||||||
|
|
||||||
assert self.add_prefix_space or not is_split_into_words, (
|
assert self.add_prefix_space or not is_split_into_words, (
|
||||||
f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True "
|
f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True "
|
||||||
|
|||||||
@@ -15,7 +15,6 @@
|
|||||||
"""PyTorch Longformer model. """
|
"""PyTorch Longformer model. """
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
@@ -1509,7 +1508,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
@@ -1538,14 +1536,6 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
|
|||||||
>>> loss = outputs.loss
|
>>> loss = outputs.loss
|
||||||
>>> prediction_logits = output.logits
|
>>> prediction_logits = output.logits
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
outputs = self.longformer(
|
outputs = self.longformer(
|
||||||
|
|||||||
@@ -1109,7 +1109,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
@@ -1119,12 +1118,6 @@ class MobileBertForMaskedLM(MobileBertPreTrainedModel):
|
|||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||||
Used to hide legacy arguments that have been deprecated.
|
Used to hide legacy arguments that have been deprecated.
|
||||||
"""
|
"""
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
outputs = self.mobilebert(
|
outputs = self.mobilebert(
|
||||||
|
|||||||
@@ -19,7 +19,6 @@
|
|||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
@@ -645,7 +644,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input):
|
mc_token_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input):
|
||||||
@@ -659,8 +657,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
|
Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
|
||||||
num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
|
num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
|
||||||
`input_ids` above)
|
`input_ids` above)
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
|
||||||
Used to hide legacy arguments that have been deprecated.
|
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
|
|
||||||
@@ -683,13 +679,6 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
>>> mc_logits = outputs.mc_logits
|
>>> mc_logits = outputs.mc_logits
|
||||||
"""
|
"""
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
if "lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("lm_labels")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
|
|
||||||
transformer_outputs = self.transformer(
|
transformer_outputs = self.transformer(
|
||||||
input_ids,
|
input_ids,
|
||||||
|
|||||||
@@ -302,7 +302,7 @@ class ProphetNetTokenizer(PreTrainedTokenizer):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> BatchEncoding:
|
) -> BatchEncoding:
|
||||||
if max_length is None:
|
if max_length is None:
|
||||||
max_length = self.max_len
|
max_length = self.model_max_length
|
||||||
model_inputs = self(
|
model_inputs = self(
|
||||||
src_texts,
|
src_texts,
|
||||||
add_special_tokens=True,
|
add_special_tokens=True,
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
"""PyTorch RoBERTa model. """
|
"""PyTorch RoBERTa model. """
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import warnings
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
@@ -872,7 +871,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
|
||||||
@@ -882,13 +880,6 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
|
|||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
||||||
Used to hide legacy arguments that have been deprecated.
|
Used to hide legacy arguments that have been deprecated.
|
||||||
"""
|
"""
|
||||||
if "masked_lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("masked_lm_labels")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
outputs = self.roberta(
|
outputs = self.roberta(
|
||||||
|
|||||||
@@ -14,7 +14,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Tokenization classes for RoBERTa."""
|
"""Tokenization classes for RoBERTa."""
|
||||||
|
|
||||||
import warnings
|
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from ...tokenization_utils import AddedToken
|
from ...tokenization_utils import AddedToken
|
||||||
@@ -251,13 +250,6 @@ class RobertaTokenizer(GPT2Tokenizer):
|
|||||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||||
|
|
||||||
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
||||||
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
|
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
|
||||||
text = " " + text
|
text = " " + text
|
||||||
|
|||||||
@@ -18,7 +18,6 @@
|
|||||||
import copy
|
import copy
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
@@ -1048,7 +1047,6 @@ class T5Model(T5PreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
Returns:
|
Returns:
|
||||||
@@ -1066,20 +1064,6 @@ class T5Model(T5PreTrainedModel):
|
|||||||
|
|
||||||
>>> last_hidden_states = outputs.last_hidden_state
|
>>> last_hidden_states = outputs.last_hidden_state
|
||||||
"""
|
"""
|
||||||
if "decoder_past_key_value_states" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("decoder_past_key_value_states")
|
|
||||||
if "decoder_past_key_values" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("decoder_past_key_values")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
|
|
||||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
@@ -1198,15 +1182,12 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
return_dict=None,
|
return_dict=None,
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
r"""
|
r"""
|
||||||
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
|
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
|
||||||
Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[-100, 0, ...,
|
Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[-100, 0, ...,
|
||||||
config.vocab_size - 1]`. All labels set to ``-100`` are ignored (masked), the loss is only computed for
|
config.vocab_size - 1]`. All labels set to ``-100`` are ignored (masked), the loss is only computed for
|
||||||
labels in ``[0, ..., config.vocab_size]``
|
labels in ``[0, ..., config.vocab_size]``
|
||||||
kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
|
|
||||||
Used to hide legacy arguments that have been deprecated.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
@@ -1226,27 +1207,6 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
|
|||||||
>>> input_ids = tokenizer("summarize: studies have shown that owning a dog is good for you ", return_tensors="pt").input_ids # Batch size 1
|
>>> input_ids = tokenizer("summarize: studies have shown that owning a dog is good for you ", return_tensors="pt").input_ids # Batch size 1
|
||||||
>>> outputs = model.generate(input_ids)
|
>>> outputs = model.generate(input_ids)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if "lm_labels" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
labels = kwargs.pop("lm_labels")
|
|
||||||
if "decoder_past_key_value_states" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("decoder_past_key_value_states")
|
|
||||||
if "decoder_past_key_values" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `decoder_past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("decoder_past_key_values")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
|
|
||||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
|
||||||
|
|||||||
@@ -595,7 +595,6 @@ class TFT5MainLayer(tf.keras.layers.Layer):
|
|||||||
output_attentions=None,
|
output_attentions=None,
|
||||||
output_hidden_states=None,
|
output_hidden_states=None,
|
||||||
training=False,
|
training=False,
|
||||||
**kwargs,
|
|
||||||
) -> Tuple:
|
) -> Tuple:
|
||||||
if isinstance(inputs, (tuple, list)):
|
if isinstance(inputs, (tuple, list)):
|
||||||
input_ids = inputs[0]
|
input_ids = inputs[0]
|
||||||
@@ -621,21 +620,8 @@ class TFT5MainLayer(tf.keras.layers.Layer):
|
|||||||
output_attentions = inputs.get("output_attentions", output_attentions)
|
output_attentions = inputs.get("output_attentions", output_attentions)
|
||||||
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
||||||
assert len(inputs) <= 10, "Too many inputs."
|
assert len(inputs) <= 10, "Too many inputs."
|
||||||
|
|
||||||
if "past_key_values" in inputs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = inputs.pop("past_key_values")
|
|
||||||
else:
|
else:
|
||||||
input_ids = inputs
|
input_ids = inputs
|
||||||
if "past_key_values" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past_key_values` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("past_key_values")
|
|
||||||
|
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.output_attentions
|
||||||
output_hidden_states = output_hidden_states if output_hidden_states is not None else self.output_hidden_states
|
output_hidden_states = output_hidden_states if output_hidden_states is not None else self.output_hidden_states
|
||||||
@@ -1078,23 +1064,9 @@ class TFT5Model(TFT5PreTrainedModel):
|
|||||||
output_attentions = inputs.get("output_attentions", output_attentions)
|
output_attentions = inputs.get("output_attentions", output_attentions)
|
||||||
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
||||||
assert len(inputs) <= 13, "Too many inputs."
|
assert len(inputs) <= 13, "Too many inputs."
|
||||||
|
|
||||||
if "past_key_value_states" in inputs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = inputs.pop("past_key_value_states")
|
|
||||||
else:
|
else:
|
||||||
input_ids = inputs
|
input_ids = inputs
|
||||||
|
|
||||||
if "past_key_value_states" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("past_key_value_states")
|
|
||||||
|
|
||||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||||
output_attentions = output_attentions if output_attentions else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions else self.config.output_attentions
|
||||||
output_hidden_states = output_hidden_states if output_hidden_states else self.config.output_hidden_states
|
output_hidden_states = output_hidden_states if output_hidden_states else self.config.output_hidden_states
|
||||||
@@ -1294,23 +1266,9 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling
|
|||||||
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
output_hidden_states = inputs.get("output_hidden_states", output_hidden_states)
|
||||||
return_dict = inputs.get("return_dict", return_dict)
|
return_dict = inputs.get("return_dict", return_dict)
|
||||||
assert len(inputs) <= 14, "Too many inputs."
|
assert len(inputs) <= 14, "Too many inputs."
|
||||||
|
|
||||||
if "past_key_value_states" in inputs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = inputs.pop("past_key_value_states")
|
|
||||||
else:
|
else:
|
||||||
input_ids = inputs
|
input_ids = inputs
|
||||||
|
|
||||||
if "past_key_value_states" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `past_key_value_states` argument is deprecated and will be removed in a future version, use `past_key_values` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
past_key_values = kwargs.pop("past_key_value_states")
|
|
||||||
|
|
||||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||||
output_attentions = output_attentions if output_attentions else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions else self.config.output_attentions
|
||||||
output_hidden_states = output_hidden_states if output_hidden_states else self.config.output_hidden_states
|
output_hidden_states = output_hidden_states if output_hidden_states else self.config.output_hidden_states
|
||||||
|
|||||||
@@ -15,9 +15,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
""" Transformer XL configuration """
|
""" Transformer XL configuration """
|
||||||
|
|
||||||
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
from ...configuration_utils import PretrainedConfig
|
from ...configuration_utils import PretrainedConfig
|
||||||
from ...utils import logging
|
from ...utils import logging
|
||||||
|
|
||||||
@@ -139,13 +136,6 @@ class TransfoXLConfig(PretrainedConfig):
|
|||||||
eos_token_id=0,
|
eos_token_id=0,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
if "tie_weight" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The config parameter `tie_weight` is deprecated. Please use `tie_word_embeddings` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
kwargs["tie_word_embeddings"] = kwargs["tie_weight"]
|
|
||||||
|
|
||||||
super().__init__(eos_token_id=eos_token_id, **kwargs)
|
super().__init__(eos_token_id=eos_token_id, **kwargs)
|
||||||
self.vocab_size = vocab_size
|
self.vocab_size = vocab_size
|
||||||
self.cutoffs = []
|
self.cutoffs = []
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
"""
|
"""
|
||||||
TF 2.0 Transformer XL model.
|
TF 2.0 Transformer XL model.
|
||||||
"""
|
"""
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
@@ -865,13 +864,6 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
|
|||||||
return self.crit.out_layers[-1]
|
return self.crit.out_layers[-1]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def reset_length(self, tgt_len, ext_len, mem_len):
|
|
||||||
warnings.warn(
|
|
||||||
"The method `reset_length` is deprecated and will be removed in a future version, use `reset_memory_length` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
self.transformer.reset_memory_length(mem_len)
|
|
||||||
|
|
||||||
def reset_memory_length(self, mem_len):
|
def reset_memory_length(self, mem_len):
|
||||||
self.transformer.reset_memory_length(mem_len)
|
self.transformer.reset_memory_length(mem_len)
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,6 @@
|
|||||||
PyTorch Transformer XL model. Adapted from https://github.com/kimiyoung/transformer-xl. In particular
|
PyTorch Transformer XL model. Adapted from https://github.com/kimiyoung/transformer-xl. In particular
|
||||||
https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py
|
https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py
|
||||||
"""
|
"""
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
@@ -1010,13 +1009,6 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
|
|||||||
else:
|
else:
|
||||||
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[i]
|
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[i]
|
||||||
|
|
||||||
def reset_length(self, tgt_len, ext_len, mem_len):
|
|
||||||
warnings.warn(
|
|
||||||
"The method `reset_length` is deprecated and will be removed in a future version, use `reset_memory_length` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
self.transformer.reset_memory_length(mem_len)
|
|
||||||
|
|
||||||
def reset_memory_length(self, mem_len):
|
def reset_memory_length(self, mem_len):
|
||||||
self.transformer.reset_memory_length(mem_len)
|
self.transformer.reset_memory_length(mem_len)
|
||||||
|
|
||||||
|
|||||||
@@ -16,9 +16,7 @@
|
|||||||
TF 2.0 XLM model.
|
TF 2.0 XLM model.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
@@ -997,10 +995,9 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if lengths is not None:
|
if lengths is not None:
|
||||||
warnings.warn(
|
logger.warn(
|
||||||
"The `lengths` parameter cannot be used with the XLM multiple choice models. Please use the "
|
"The `lengths` parameter cannot be used with the XLM multiple choice models. Please use the "
|
||||||
"attention mask instead.",
|
"attention mask instead.",
|
||||||
FutureWarning,
|
|
||||||
)
|
)
|
||||||
lengths = None
|
lengths = None
|
||||||
|
|
||||||
|
|||||||
@@ -16,10 +16,8 @@
|
|||||||
PyTorch XLM model.
|
PyTorch XLM model.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import math
|
import math
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
@@ -1228,10 +1226,9 @@ class XLMForMultipleChoice(XLMPreTrainedModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if lengths is not None:
|
if lengths is not None:
|
||||||
warnings.warn(
|
logger.warn(
|
||||||
"The `lengths` parameter cannot be used with the XLM multiple choice models. Please use the "
|
"The `lengths` parameter cannot be used with the XLM multiple choice models. Please use the "
|
||||||
"attention mask instead.",
|
"attention mask instead."
|
||||||
FutureWarning,
|
|
||||||
)
|
)
|
||||||
lengths = None
|
lengths = None
|
||||||
|
|
||||||
|
|||||||
@@ -1182,7 +1182,6 @@ class FillMaskPipeline(Pipeline):
|
|||||||
device: int = -1,
|
device: int = -1,
|
||||||
top_k=5,
|
top_k=5,
|
||||||
task: str = "",
|
task: str = "",
|
||||||
**kwargs
|
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
model=model,
|
model=model,
|
||||||
@@ -1196,15 +1195,7 @@ class FillMaskPipeline(Pipeline):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.check_model_type(TF_MODEL_WITH_LM_HEAD_MAPPING if self.framework == "tf" else MODEL_FOR_MASKED_LM_MAPPING)
|
self.check_model_type(TF_MODEL_WITH_LM_HEAD_MAPPING if self.framework == "tf" else MODEL_FOR_MASKED_LM_MAPPING)
|
||||||
|
self.top_k = top_k
|
||||||
if "topk" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"The `topk` argument is deprecated and will be removed in a future version, use `top_k` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
self.top_k = kwargs.pop("topk")
|
|
||||||
else:
|
|
||||||
self.top_k = top_k
|
|
||||||
|
|
||||||
def ensure_exactly_one_mask_token(self, masked_index: np.ndarray):
|
def ensure_exactly_one_mask_token(self, masked_index: np.ndarray):
|
||||||
numel = np.prod(masked_index.shape)
|
numel = np.prod(masked_index.shape)
|
||||||
|
|||||||
@@ -19,7 +19,6 @@
|
|||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import warnings
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union, overload
|
from typing import Any, Dict, List, Optional, Tuple, Union, overload
|
||||||
|
|
||||||
from .file_utils import add_end_docstrings
|
from .file_utils import add_end_docstrings
|
||||||
@@ -246,12 +245,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
Returns:
|
Returns:
|
||||||
:obj:`List[str]`: The list of tokens.
|
:obj:`List[str]`: The list of tokens.
|
||||||
"""
|
"""
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
kwargs["is_split_into_words"] = kwargs.pop("is_pretokenized")
|
|
||||||
# Simple mapping string => AddedToken for special tokens with specific tokenization behaviors
|
# Simple mapping string => AddedToken for special tokens with specific tokenization behaviors
|
||||||
all_special_tokens_extended = dict(
|
all_special_tokens_extended = dict(
|
||||||
(str(t), t) for t in self.all_special_tokens_extended if isinstance(t, AddedToken)
|
(str(t), t) for t in self.all_special_tokens_extended if isinstance(t, AddedToken)
|
||||||
@@ -448,13 +441,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
"https://github.com/huggingface/transformers/pull/2674"
|
"https://github.com/huggingface/transformers/pull/2674"
|
||||||
)
|
)
|
||||||
|
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
first_ids = get_input_ids(text)
|
first_ids = get_input_ids(text)
|
||||||
second_ids = get_input_ids(text_pair) if text_pair is not None else None
|
second_ids = get_input_ids(text_pair) if text_pair is not None else None
|
||||||
|
|
||||||
@@ -530,13 +516,6 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
|||||||
"transformers.PreTrainedTokenizerFast."
|
"transformers.PreTrainedTokenizerFast."
|
||||||
)
|
)
|
||||||
|
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
input_ids = []
|
input_ids = []
|
||||||
for ids_or_pair_ids in batch_text_or_text_pairs:
|
for ids_or_pair_ids in batch_text_or_text_pairs:
|
||||||
if not isinstance(ids_or_pair_ids, (list, tuple)):
|
if not isinstance(ids_or_pair_ids, (list, tuple)):
|
||||||
|
|||||||
@@ -1532,18 +1532,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
|
|||||||
|
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
@property
|
|
||||||
def max_len(self) -> int:
|
|
||||||
"""
|
|
||||||
:obj:`int`: **Deprecated** Kept here for backward compatibility. Now renamed to :obj:`model_max_length` to
|
|
||||||
avoid ambiguity.
|
|
||||||
"""
|
|
||||||
warnings.warn(
|
|
||||||
"The `max_len` attribute has been deprecated and will be removed in a future version, use `model_max_length` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return self.model_max_length
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def max_len_single_sentence(self) -> int:
|
def max_len_single_sentence(self) -> int:
|
||||||
"""
|
"""
|
||||||
@@ -2785,15 +2773,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
|
|||||||
and ``convert_tokens_to_ids`` methods.
|
and ``convert_tokens_to_ids`` methods.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if "return_lengths" in kwargs:
|
|
||||||
if verbose:
|
|
||||||
warnings.warn(
|
|
||||||
"The PreTrainedTokenizerBase.prepare_for_model `return_lengths` parameter is deprecated. "
|
|
||||||
"Please use `return_length` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return_length = kwargs["return_lengths"]
|
|
||||||
|
|
||||||
# Backward compatibility for 'truncation_strategy', 'pad_to_max_length'
|
# Backward compatibility for 'truncation_strategy', 'pad_to_max_length'
|
||||||
padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
|
padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
|
||||||
padding=padding,
|
padding=padding,
|
||||||
|
|||||||
@@ -19,7 +19,6 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
@@ -357,7 +356,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
return_offsets_mapping: bool = False,
|
return_offsets_mapping: bool = False,
|
||||||
return_length: bool = False,
|
return_length: bool = False,
|
||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
**kwargs
|
|
||||||
) -> BatchEncoding:
|
) -> BatchEncoding:
|
||||||
|
|
||||||
if not isinstance(batch_text_or_text_pairs, list):
|
if not isinstance(batch_text_or_text_pairs, list):
|
||||||
@@ -365,16 +363,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
"batch_text_or_text_pairs has to be a list (got {})".format(type(batch_text_or_text_pairs))
|
"batch_text_or_text_pairs has to be a list (got {})".format(type(batch_text_or_text_pairs))
|
||||||
)
|
)
|
||||||
|
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
if kwargs:
|
|
||||||
raise ValueError(f"Keyword arguments {kwargs} not recognized.")
|
|
||||||
|
|
||||||
# Set the truncation and padding strategy and restore the initial configuration
|
# Set the truncation and padding strategy and restore the initial configuration
|
||||||
self.set_truncation_and_padding(
|
self.set_truncation_and_padding(
|
||||||
padding_strategy=padding_strategy,
|
padding_strategy=padding_strategy,
|
||||||
@@ -453,12 +441,6 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
|||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
**kwargs
|
**kwargs
|
||||||
) -> BatchEncoding:
|
) -> BatchEncoding:
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
batched_input = [(text, text_pair)] if text_pair else [text]
|
batched_input = [(text, text_pair)] if text_pair else [text]
|
||||||
batched_output = self._batch_encode_plus(
|
batched_output = self._batch_encode_plus(
|
||||||
|
|||||||
@@ -213,8 +213,6 @@ class Trainer:
|
|||||||
containing the optimizer and the scheduler to use. Will default to an instance of
|
containing the optimizer and the scheduler to use. Will default to an instance of
|
||||||
:class:`~transformers.AdamW` on your model and a scheduler given by
|
:class:`~transformers.AdamW` on your model and a scheduler given by
|
||||||
:func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`.
|
:func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`.
|
||||||
kwargs:
|
|
||||||
Deprecated keyword arguments.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -229,7 +227,6 @@ class Trainer:
|
|||||||
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
|
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
|
||||||
callbacks: Optional[List[TrainerCallback]] = None,
|
callbacks: Optional[List[TrainerCallback]] = None,
|
||||||
optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
|
optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
if args is None:
|
if args is None:
|
||||||
logger.info("No `TrainingArguments` passed, using the current path as `output_dir`.")
|
logger.info("No `TrainingArguments` passed, using the current path as `output_dir`.")
|
||||||
@@ -262,27 +259,6 @@ class Trainer:
|
|||||||
self.callback_handler = CallbackHandler(callbacks, self.model, self.optimizer, self.lr_scheduler)
|
self.callback_handler = CallbackHandler(callbacks, self.model, self.optimizer, self.lr_scheduler)
|
||||||
self.add_callback(PrinterCallback if self.args.disable_tqdm else DEFAULT_PROGRESS_CALLBACK)
|
self.add_callback(PrinterCallback if self.args.disable_tqdm else DEFAULT_PROGRESS_CALLBACK)
|
||||||
|
|
||||||
# Deprecated arguments
|
|
||||||
if "tb_writer" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"Passing `tb_writer` as a keyword argument is deprecated and won't be possible in a "
|
|
||||||
+ "future version. Use `TensorBoardCallback(tb_writer=...)` instead and pass it to the `callbacks`"
|
|
||||||
+ "argument",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
tb_writer = kwargs.pop("tb_writer")
|
|
||||||
self.remove_callback(TensorBoardCallback)
|
|
||||||
self.add_callback(TensorBoardCallback(tb_writer=tb_writer))
|
|
||||||
if "prediction_loss_only" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"Passing `prediction_loss_only` as a keyword argument is deprecated and won't be possible in a "
|
|
||||||
+ "future version. Use `args.prediction_loss_only` instead. Setting "
|
|
||||||
+ f"`args.prediction_loss_only={kwargs['prediction_loss_only']}",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
self.args.prediction_loss_only = kwargs.pop("prediction_loss_only")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
|
|
||||||
# Will be set to True by `self._setup_loggers()` on first call to `self.log()`.
|
# Will be set to True by `self._setup_loggers()` on first call to `self.log()`.
|
||||||
self._loggers_initialized = False
|
self._loggers_initialized = False
|
||||||
|
|
||||||
@@ -294,14 +270,7 @@ class Trainer:
|
|||||||
# We'll find a more elegant and not need to do this in the future.
|
# We'll find a more elegant and not need to do this in the future.
|
||||||
self.model.config.xla_device = True
|
self.model.config.xla_device = True
|
||||||
if not callable(self.data_collator) and callable(getattr(self.data_collator, "collate_batch", None)):
|
if not callable(self.data_collator) and callable(getattr(self.data_collator, "collate_batch", None)):
|
||||||
self.data_collator = self.data_collator.collate_batch
|
raise ValueError("The `data_collator` should be a simple callable (function, class with `__call__`).")
|
||||||
warnings.warn(
|
|
||||||
(
|
|
||||||
"The `data_collator` should now be a simple callable (function, class with `__call__`), classes "
|
|
||||||
+ "with a `collate_batch` are deprecated and won't be supported in a future version."
|
|
||||||
),
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.max_steps > 0:
|
if args.max_steps > 0:
|
||||||
logger.info("max_steps is given, it will override any value given in num_train_epochs")
|
logger.info("max_steps is given, it will override any value given in num_train_epochs")
|
||||||
@@ -1050,12 +1019,6 @@ class Trainer:
|
|||||||
logs (:obj:`Dict[str, float]`):
|
logs (:obj:`Dict[str, float]`):
|
||||||
The values to log.
|
The values to log.
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "_log"):
|
|
||||||
warnings.warn(
|
|
||||||
"The `_log` method is deprecated and won't be called in a future version, define `log` in your subclass.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return self._log(logs)
|
|
||||||
if self.state.epoch is not None:
|
if self.state.epoch is not None:
|
||||||
logs["epoch"] = self.state.epoch
|
logs["epoch"] = self.state.epoch
|
||||||
|
|
||||||
@@ -1095,12 +1058,6 @@ class Trainer:
|
|||||||
Return:
|
Return:
|
||||||
:obj:`torch.Tensor`: The tensor with training loss on this batch.
|
:obj:`torch.Tensor`: The tensor with training loss on this batch.
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "_training_step"):
|
|
||||||
warnings.warn(
|
|
||||||
"The `_training_step` method is deprecated and won't be called in a future version, define `training_step` in your subclass.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return self._training_step(model, inputs, self.optimizer)
|
|
||||||
|
|
||||||
model.train()
|
model.train()
|
||||||
inputs = self._prepare_inputs(inputs)
|
inputs = self._prepare_inputs(inputs)
|
||||||
@@ -1140,18 +1097,6 @@ class Trainer:
|
|||||||
# We don't use .loss here since the model may return tuples instead of ModelOutput.
|
# We don't use .loss here since the model may return tuples instead of ModelOutput.
|
||||||
return outputs[0]
|
return outputs[0]
|
||||||
|
|
||||||
def is_local_master(self) -> bool:
|
|
||||||
"""
|
|
||||||
Whether or not this process is the local (e.g., on one machine if training in a distributed fashion on several
|
|
||||||
machines) main process.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
This method is deprecated, use :meth:`~transformers.Trainer.is_local_process_zero` instead.
|
|
||||||
"""
|
|
||||||
warnings.warn("This method is deprecated, use `Trainer.is_local_process_zero()` instead.", FutureWarning)
|
|
||||||
return self.is_local_process_zero()
|
|
||||||
|
|
||||||
def is_local_process_zero(self) -> bool:
|
def is_local_process_zero(self) -> bool:
|
||||||
"""
|
"""
|
||||||
Whether or not this process is the local (e.g., on one machine if training in a distributed fashion on several
|
Whether or not this process is the local (e.g., on one machine if training in a distributed fashion on several
|
||||||
@@ -1162,18 +1107,6 @@ class Trainer:
|
|||||||
else:
|
else:
|
||||||
return self.args.local_rank in [-1, 0]
|
return self.args.local_rank in [-1, 0]
|
||||||
|
|
||||||
def is_world_master(self) -> bool:
|
|
||||||
"""
|
|
||||||
Whether or not this process is the global main process (when training in a distributed fashion on several
|
|
||||||
machines, this is only going to be :obj:`True` for one process).
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
This method is deprecated, use :meth:`~transformers.Trainer.is_world_process_zero` instead.
|
|
||||||
"""
|
|
||||||
warnings.warn("This method is deprecated, use `Trainer.is_world_process_zero()` instead.", FutureWarning)
|
|
||||||
return self.is_world_process_zero()
|
|
||||||
|
|
||||||
def is_world_process_zero(self) -> bool:
|
def is_world_process_zero(self) -> bool:
|
||||||
"""
|
"""
|
||||||
Whether or not this process is the global main process (when training in a distributed fashion on several
|
Whether or not this process is the global main process (when training in a distributed fashion on several
|
||||||
@@ -1362,13 +1295,6 @@ class Trainer:
|
|||||||
|
|
||||||
Works both with or without labels.
|
Works both with or without labels.
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "_prediction_loop"):
|
|
||||||
warnings.warn(
|
|
||||||
"The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return self._prediction_loop(dataloader, description, prediction_loss_only=prediction_loss_only)
|
|
||||||
|
|
||||||
if not isinstance(dataloader.dataset, collections.abc.Sized):
|
if not isinstance(dataloader.dataset, collections.abc.Sized):
|
||||||
raise ValueError("dataset must implement __len__")
|
raise ValueError("dataset must implement __len__")
|
||||||
prediction_loss_only = (
|
prediction_loss_only = (
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
from typing import Callable, Dict, Optional, Tuple
|
from typing import Callable, Dict, Optional, Tuple
|
||||||
|
|
||||||
|
|
||||||
@@ -66,8 +65,6 @@ class TFTrainer:
|
|||||||
:class:`~transformers.AdamWeightDecay`. The scheduler will default to an instance of
|
:class:`~transformers.AdamWeightDecay`. The scheduler will default to an instance of
|
||||||
:class:`tf.keras.optimizers.schedules.PolynomialDecay` if :obj:`args.num_warmup_steps` is 0 else an
|
:class:`tf.keras.optimizers.schedules.PolynomialDecay` if :obj:`args.num_warmup_steps` is 0 else an
|
||||||
instance of :class:`~transformers.WarmUp`.
|
instance of :class:`~transformers.WarmUp`.
|
||||||
kwargs:
|
|
||||||
Deprecated keyword arguments.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -82,7 +79,6 @@ class TFTrainer:
|
|||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
**kwargs,
|
|
||||||
):
|
):
|
||||||
assert parse(tf.__version__).release >= (2, 2, 0), (
|
assert parse(tf.__version__).release >= (2, 2, 0), (
|
||||||
"You need to run the TensorFlow trainer with at least the version 2.2.0, your version is %r "
|
"You need to run the TensorFlow trainer with at least the version 2.2.0, your version is %r "
|
||||||
@@ -98,13 +94,6 @@ class TFTrainer:
|
|||||||
self.gradient_accumulator = GradientAccumulator()
|
self.gradient_accumulator = GradientAccumulator()
|
||||||
self.global_step = 0
|
self.global_step = 0
|
||||||
self.epoch_logging = 0
|
self.epoch_logging = 0
|
||||||
if "prediction_loss_only" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"Passing `prediction_loss_only` as a keyword argument is deprecated and won't be possible in a future version. Use `args.prediction_loss_only` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
self.args.prediction_loss_only = kwargs.pop("prediction_loss_only")
|
|
||||||
assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
|
|
||||||
|
|
||||||
if tb_writer is not None:
|
if tb_writer is not None:
|
||||||
self.tb_writer = tb_writer
|
self.tb_writer = tb_writer
|
||||||
@@ -249,12 +238,6 @@ class TFTrainer:
|
|||||||
WANDB_DISABLED:
|
WANDB_DISABLED:
|
||||||
(Optional): boolean - defaults to false, set to "true" to disable wandb entirely.
|
(Optional): boolean - defaults to false, set to "true" to disable wandb entirely.
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "_setup_wandb"):
|
|
||||||
warnings.warn(
|
|
||||||
"The `_setup_wandb` method is deprecated and won't be called in a future version, define `setup_wandb` in your subclass.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return self._setup_wandb()
|
|
||||||
|
|
||||||
logger.info('Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"')
|
logger.info('Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"')
|
||||||
combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()}
|
combined_dict = {**self.model.config.to_dict(), **self.args.to_sanitized_dict()}
|
||||||
@@ -304,14 +287,6 @@ class TFTrainer:
|
|||||||
|
|
||||||
Works both with or without labels.
|
Works both with or without labels.
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "_prediction_loop"):
|
|
||||||
warnings.warn(
|
|
||||||
"The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return self._prediction_loop(
|
|
||||||
dataset, steps, num_examples, description, prediction_loss_only=prediction_loss_only
|
|
||||||
)
|
|
||||||
|
|
||||||
prediction_loss_only = (
|
prediction_loss_only = (
|
||||||
prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
|
prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
|
||||||
@@ -393,12 +368,6 @@ class TFTrainer:
|
|||||||
logs (:obj:`Dict[str, float]`):
|
logs (:obj:`Dict[str, float]`):
|
||||||
The values to log.
|
The values to log.
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "_log"):
|
|
||||||
warnings.warn(
|
|
||||||
"The `_log` method is deprecated and won't be called in a future version, define `log` in your subclass.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return self._log(logs)
|
|
||||||
logs["epoch"] = self.epoch_logging
|
logs["epoch"] = self.epoch_logging
|
||||||
|
|
||||||
if self.tb_writer:
|
if self.tb_writer:
|
||||||
@@ -733,12 +702,6 @@ class TFTrainer:
|
|||||||
Returns:
|
Returns:
|
||||||
A tuple of two :obj:`tf.Tensor`: The loss and logits.
|
A tuple of two :obj:`tf.Tensor`: The loss and logits.
|
||||||
"""
|
"""
|
||||||
if hasattr(self, "_run_model"):
|
|
||||||
warnings.warn(
|
|
||||||
"The `_run_model` method is deprecated and won't be called in a future version, define `run_model` in your subclass.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
return self._run_model(features, labels, training)
|
|
||||||
|
|
||||||
if self.args.past_index >= 0 and getattr(self, "_past", None) is not None:
|
if self.args.past_index >= 0 and getattr(self, "_past", None) is not None:
|
||||||
features["mems"] = self._past
|
features["mems"] = self._past
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import warnings
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
@@ -198,10 +197,6 @@ class TrainingArguments:
|
|||||||
do_train: bool = field(default=False, metadata={"help": "Whether to run training."})
|
do_train: bool = field(default=False, metadata={"help": "Whether to run training."})
|
||||||
do_eval: bool = field(default=None, metadata={"help": "Whether to run eval on the dev set."})
|
do_eval: bool = field(default=None, metadata={"help": "Whether to run eval on the dev set."})
|
||||||
do_predict: bool = field(default=False, metadata={"help": "Whether to run predictions on the test set."})
|
do_predict: bool = field(default=False, metadata={"help": "Whether to run predictions on the test set."})
|
||||||
evaluate_during_training: bool = field(
|
|
||||||
default=False,
|
|
||||||
metadata={"help": "Run evaluation during training at each logging step."},
|
|
||||||
)
|
|
||||||
evaluation_strategy: EvaluationStrategy = field(
|
evaluation_strategy: EvaluationStrategy = field(
|
||||||
default="no",
|
default="no",
|
||||||
metadata={"help": "Run evaluation during training at each logging step."},
|
metadata={"help": "Run evaluation during training at each logging step."},
|
||||||
@@ -340,12 +335,6 @@ class TrainingArguments:
|
|||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.disable_tqdm is None:
|
if self.disable_tqdm is None:
|
||||||
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
|
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
|
||||||
if self.evaluate_during_training is True:
|
|
||||||
self.evaluation_strategy = EvaluationStrategy.STEPS
|
|
||||||
warnings.warn(
|
|
||||||
"The `evaluate_during_training` argument is deprecated in favor of `evaluation_strategy` (which has more options)",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
self.evaluation_strategy = EvaluationStrategy(self.evaluation_strategy)
|
self.evaluation_strategy = EvaluationStrategy(self.evaluation_strategy)
|
||||||
if self.do_eval is False and self.evaluation_strategy != EvaluationStrategy.NO:
|
if self.do_eval is False and self.evaluation_strategy != EvaluationStrategy.NO:
|
||||||
self.do_eval = True
|
self.do_eval = True
|
||||||
|
|||||||
@@ -73,7 +73,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
|
|||||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||||
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
|
||||||
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
{%- elif cookiecutter.tokenizer_type == "Standalone" %}
|
||||||
import warnings
|
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from tokenizers import ByteLevelBPETokenizer
|
from tokenizers import ByteLevelBPETokenizer
|
||||||
@@ -234,13 +233,6 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(PreTrainedTokenizer):
|
|||||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||||
|
|
||||||
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
def prepare_for_tokenization(self, text, is_split_into_words=False, **kwargs):
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
|
||||||
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
|
if (is_split_into_words or add_prefix_space) and (len(text) > 0 and not text[0].isspace()):
|
||||||
text = " " + text
|
text = " " + text
|
||||||
@@ -285,29 +277,6 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast)
|
|||||||
)
|
)
|
||||||
self.add_prefix_space = add_prefix_space
|
self.add_prefix_space = add_prefix_space
|
||||||
|
|
||||||
def _batch_encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
|
||||||
is_split_into_words = None
|
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.pop("is_pretokenized")
|
|
||||||
|
|
||||||
is_split_into_words = kwargs.get("is_split_into_words", False) if is_split_into_words is None else is_split_into_words
|
|
||||||
return super()._batch_encode_plus(*args, **kwargs)
|
|
||||||
|
|
||||||
def _encode_plus(self, *args, **kwargs) -> BatchEncoding:
|
|
||||||
is_split_into_words = None
|
|
||||||
if "is_pretokenized" in kwargs:
|
|
||||||
warnings.warn(
|
|
||||||
"`is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.",
|
|
||||||
FutureWarning,
|
|
||||||
)
|
|
||||||
is_split_into_words = kwargs.get("is_split_into_words", False) if is_split_into_words is None else is_split_into_words
|
|
||||||
return super()._encode_plus(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
||||||
if token_ids_1 is None:
|
if token_ids_1 is None:
|
||||||
|
|||||||
@@ -213,7 +213,9 @@ class GPT2ModelTester:
|
|||||||
next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)
|
next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)
|
||||||
|
|
||||||
output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
|
output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
|
||||||
output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"]
|
output_from_past = model(next_tokens, token_type_ids=next_token_types, past_key_values=past)[
|
||||||
|
"last_hidden_state"
|
||||||
|
]
|
||||||
|
|
||||||
# select random slice
|
# select random slice
|
||||||
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
||||||
@@ -255,7 +257,7 @@ class GPT2ModelTester:
|
|||||||
|
|
||||||
# get two different outputs
|
# get two different outputs
|
||||||
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)["last_hidden_state"]
|
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)["last_hidden_state"]
|
||||||
output_from_past = model(next_tokens, past=past, attention_mask=attn_mask)["last_hidden_state"]
|
output_from_past = model(next_tokens, past_key_values=past, attention_mask=attn_mask)["last_hidden_state"]
|
||||||
|
|
||||||
# select random slice
|
# select random slice
|
||||||
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
||||||
@@ -286,7 +288,9 @@ class GPT2ModelTester:
|
|||||||
next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)
|
next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)
|
||||||
|
|
||||||
output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
|
output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
|
||||||
output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"]
|
output_from_past = model(next_tokens, token_type_ids=next_token_types, past_key_values=past)[
|
||||||
|
"last_hidden_state"
|
||||||
|
]
|
||||||
self.parent.assertTrue(output_from_past.shape[1] == next_tokens.shape[1])
|
self.parent.assertTrue(output_from_past.shape[1] == next_tokens.shape[1])
|
||||||
|
|
||||||
# select random slice
|
# select random slice
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from transformers import pipeline
|
from transformers import pipeline
|
||||||
from transformers.testing_utils import require_tf, require_torch, slow
|
from transformers.testing_utils import require_tf, require_torch, slow
|
||||||
|
|
||||||
@@ -53,13 +51,6 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
|
|||||||
]
|
]
|
||||||
expected_check_keys = ["sequence"]
|
expected_check_keys = ["sequence"]
|
||||||
|
|
||||||
@require_torch
|
|
||||||
def test_torch_topk_deprecation(self):
|
|
||||||
# At pipeline initialization only it was not enabled at pipeline
|
|
||||||
# call site before
|
|
||||||
with pytest.warns(FutureWarning, match=r".*use `top_k`.*"):
|
|
||||||
pipeline(task="fill-mask", model=self.small_models[0], topk=1)
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
def test_torch_fill_mask(self):
|
def test_torch_fill_mask(self):
|
||||||
valid_inputs = "My name is <mask>"
|
valid_inputs = "My name is <mask>"
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ class AutoTokenizerTest(unittest.TestCase):
|
|||||||
else:
|
else:
|
||||||
self.assertEqual(tokenizer.do_lower_case, False)
|
self.assertEqual(tokenizer.do_lower_case, False)
|
||||||
|
|
||||||
self.assertEqual(tokenizer.max_len, 512)
|
self.assertEqual(tokenizer.model_max_length, 512)
|
||||||
|
|
||||||
@require_tokenizers
|
@require_tokenizers
|
||||||
def test_tokenizer_identifier_non_existent(self):
|
def test_tokenizer_identifier_non_existent(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user