From d1b14c9b548de34b6606946482946008622967db Mon Sep 17 00:00:00 2001 From: Jan Jitse Venselaar Date: Mon, 1 Feb 2021 17:17:50 +0100 Subject: [PATCH] Tensorflow doc changes on loss output size (#9922) * Change documentation to correctly specify loss tensor size * Change documentation to correct input format for labels * Corrected output size of loss tensor for sequence classifier, multiple choice model and question answering --- src/transformers/modeling_tf_outputs.py | 20 +++++++++---------- .../blenderbot/modeling_tf_blenderbot.py | 2 +- .../modeling_tf_blenderbot_small.py | 2 +- .../models/marian/modeling_tf_marian.py | 2 +- .../models/pegasus/modeling_tf_pegasus.py | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/transformers/modeling_tf_outputs.py b/src/transformers/modeling_tf_outputs.py index ed0133c984..4c98106e30 100644 --- a/src/transformers/modeling_tf_outputs.py +++ b/src/transformers/modeling_tf_outputs.py @@ -175,7 +175,7 @@ class TFCausalLMOutput(ModelOutput): Base class for causal language model (or autoregressive) outputs. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`labels` is provided): Language modeling loss (for next-token prediction). logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -204,7 +204,7 @@ class TFCausalLMOutputWithPast(ModelOutput): Base class for causal language model (or autoregressive) outputs. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`labels` is provided): Language modeling loss (for next-token prediction). logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -240,7 +240,7 @@ class TFMaskedLMOutput(ModelOutput): Base class for masked language models outputs. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`labels` is provided): Masked language modeling (MLM) loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -269,7 +269,7 @@ class TFSeq2SeqLMOutput(ModelOutput): Base class for sequence-to-sequence language models outputs. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`labels` is provided): Language modeling loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -321,7 +321,7 @@ class TFNextSentencePredictorOutput(ModelOutput): Base class for outputs of models predicting if two sentences are consecutive or not. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`next_sentence_label` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of non-masked labels, returned when :obj:`next_sentence_label` is provided): Next sentence prediction loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, 2)`): Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation @@ -351,7 +351,7 @@ class TFSequenceClassifierOutput(ModelOutput): Base class for outputs of sentence classification models. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(batch_size, )`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). @@ -432,7 +432,7 @@ class TFMultipleChoiceModelOutput(ModelOutput): Base class for outputs of multiple choice models. Args: - loss (:obj:`tf.Tensor` of shape `(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape `(batch_size, )`, `optional`, returned when :obj:`labels` is provided): Classification loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, num_choices)`): `num_choices` is the second dimension of the input tensors. (see `input_ids` above). @@ -463,7 +463,7 @@ class TFTokenClassifierOutput(ModelOutput): Base class for outputs of token classification models. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) : + loss (:obj:`tf.Tensor` of shape :obj:`(n,)`, `optional`, where n is the number of unmasked labels, returned when ``labels`` is provided) : Classification loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`): Classification scores (before SoftMax). @@ -492,7 +492,7 @@ class TFQuestionAnsweringModelOutput(ModelOutput): Base class for outputs of question answering models. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(batch_size, )`, `optional`, returned when :obj:`start_positions` and :obj:`end_positions` are provided): Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`): Span-start scores (before SoftMax). @@ -579,7 +579,7 @@ class TFSequenceClassifierOutputWithPast(ModelOutput): Base class for outputs of sentence classification models. Args: - loss (:obj:`tf.Tensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): + loss (:obj:`tf.Tensor` of shape :obj:`(batch_size, )`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`tf.Tensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index b5c7d80c7f..cb935a35f0 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -1322,7 +1322,7 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal **kwargs, ): r""" - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`tf.tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should either be in ``[0, ..., config.vocab_size]`` or -100 (see ``input_ids`` docstring). Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``. diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index beed93b227..ab7fb68bde 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -1297,7 +1297,7 @@ class TFBlenderbotSmallForConditionalGeneration(TFBlenderbotSmallPreTrainedModel **kwargs, ): r""" - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`tf.tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should either be in ``[0, ..., config.vocab_size]`` or -100 (see ``input_ids`` docstring). Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``. diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index dbc4c80016..e9b31d23a4 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -1314,7 +1314,7 @@ class TFMarianMTModel(TFMarianPreTrainedModel, TFCausalLanguageModelingLoss): **kwargs, ): r""" - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`tf.tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should either be in ``[0, ..., config.vocab_size]`` or -100 (see ``input_ids`` docstring). Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``. diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 57908e223c..73f9e4e8d3 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -1328,7 +1328,7 @@ class TFPegasusForConditionalGeneration(TFPegasusPreTrainedModel, TFCausalLangua **kwargs, ): """ - labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): + labels (:obj:`tf.tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): Labels for computing the masked language modeling loss. Indices should either be in ``[0, ..., config.vocab_size]`` or -100 (see ``input_ids`` docstring). Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``.