From 0e9899f4511b63e0f96d89bfc312a082a203acf1 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Mon, 20 Jan 2020 16:57:54 -0500 Subject: [PATCH] Fixes --- docs/source/model_doc/camembert.rst | 14 +++++++------- docs/source/model_doc/ctrl.rst | 7 ++++--- src/transformers/modeling_albert.py | 6 +++--- src/transformers/modeling_bert.py | 13 +++++++------ src/transformers/modeling_ctrl.py | 4 ++-- src/transformers/modeling_distilbert.py | 19 +++++++++++-------- src/transformers/modeling_gpt2.py | 11 ++++++----- src/transformers/modeling_openai.py | 6 +++--- src/transformers/modeling_roberta.py | 6 +++--- src/transformers/modeling_tf_albert.py | 4 ++-- src/transformers/modeling_tf_bert.py | 16 ++++++++-------- src/transformers/modeling_tf_ctrl.py | 4 ++-- src/transformers/modeling_tf_distilbert.py | 14 +++++++------- src/transformers/modeling_tf_gpt2.py | 6 +++--- src/transformers/modeling_tf_openai.py | 6 +++--- src/transformers/modeling_tf_roberta.py | 8 ++++---- src/transformers/modeling_tf_transfo_xl.py | 4 ++-- src/transformers/modeling_tf_xlm.py | 8 ++++---- src/transformers/modeling_tf_xlnet.py | 10 +++++----- src/transformers/modeling_transfo_xl.py | 4 ++-- src/transformers/modeling_utils.py | 22 ++++++++++------------ src/transformers/modeling_xlm.py | 10 +++++----- src/transformers/modeling_xlnet.py | 14 +++++++------- 23 files changed, 110 insertions(+), 106 deletions(-) diff --git a/docs/source/model_doc/camembert.rst b/docs/source/model_doc/camembert.rst index 9272a569d3..58958dd2fd 100644 --- a/docs/source/model_doc/camembert.rst +++ b/docs/source/model_doc/camembert.rst @@ -10,49 +10,49 @@ It is a model trained on 138GB of French text. This implementation is the same as RoBERTa. ``CamembertConfig`` -~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.CamembertConfig :members: ``CamembertTokenizer`` -~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.CamembertTokenizer :members: ``CamembertModel`` -~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.CamembertModel :members: ``CamembertForMaskedLM`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.CamembertForMaskedLM :members: ``CamembertForSequenceClassification`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.CamembertForSequenceClassification :members: ``CamembertForMultipleChoice`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.CamembertForMultipleChoice :members: ``CamembertForTokenClassification`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.CamembertForTokenClassification :members: diff --git a/docs/source/model_doc/ctrl.rst b/docs/source/model_doc/ctrl.rst index e358da0a4e..e83007d8dd 100644 --- a/docs/source/model_doc/ctrl.rst +++ b/docs/source/model_doc/ctrl.rst @@ -1,13 +1,14 @@ CTRL ---------------------------------------------------- -CTRL model was proposed in `CTRL: A Conditional Transformer Language Model for Controllable Generation`_ +CTRL model was proposed in `CTRL: A Conditional Transformer Language Model for Controllable Generation `_ by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher. It's a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus of ~140 GB of text data with the first token reserved as a control code (such as Links, Books, Wikipedia etc.). -This model is a PyTorch `torch.nn.Module`_ sub-class. Use it as a regular PyTorch Module and -refer to the PyTorch documentation for all matter related to general usage and behavior. +This model is a PyTorch `torch.nn.Module `_ sub-class. +Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general +usage and behavior. Note: if you fine-tune a CTRL model using the Salesforce code (https://github.com/salesforce/ctrl), you'll be able to convert from TF to our HuggingFace/Transformers format using the diff --git a/src/transformers/modeling_albert.py b/src/transformers/modeling_albert.py index fc766d55e4..1a79b83590 100644 --- a/src/transformers/modeling_albert.py +++ b/src/transformers/modeling_albert.py @@ -494,7 +494,7 @@ class AlbertModel(AlbertPreTrainedModel): ): r""" Return: - :obj:`Tuple` comprising various elements depending on the configuration (:class:`~transformers.AlbertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.AlbertConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. pooler_output (:obj:`torch.FloatTensor`: of shape :obj:`(batch_size, hidden_size)`): @@ -726,7 +726,7 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel): If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy). Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.AlbertConfig`) and inputs: loss: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: Classification (or regression if config.num_labels==1) loss. logits ``torch.FloatTensor`` of shape ``(batch_size, config.num_labels)`` @@ -824,7 +824,7 @@ class AlbertForQuestionAnswering(AlbertPreTrainedModel): Position outside of the sequence are not taken into account for computing the loss. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.AlbertConfig`) and inputs: loss: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_scores ``torch.FloatTensor`` of shape ``(batch_size, sequence_length,)`` diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index 8864494526..630ab3b62f 100644 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -546,8 +546,9 @@ class BertPreTrainedModel(PreTrainedModel): BERT_START_DOCSTRING = r""" - This model is a PyTorch `torch.nn.Module`_ sub-class. Use it as a regular PyTorch Module and - refer to the PyTorch documentation for all matter related to general usage and behavior. + This model is a PyTorch `torch.nn.Module `_ sub-class. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general + usage and behavior. Parameters: config (:class:`~transformers.BertConfig`): Model configuration class with all the parameters of the model. @@ -660,7 +661,7 @@ class BertModel(BertPreTrainedModel): ): r""" Return: - :obj:`Tuple` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. pooler_output (:obj:`torch.FloatTensor`: of shape :obj:`(batch_size, hidden_size)`): @@ -1218,7 +1219,7 @@ class BertForMultipleChoice(BertPreTrainedModel): of the input tensors. (see `input_ids` above) Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: loss (:obj:`torch.FloatTensor`` of shape ``(1,)`, `optional`, returned when :obj:`labels` is provided): Classification loss. classification_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_choices)`): @@ -1313,7 +1314,7 @@ class BertForTokenClassification(BertPreTrainedModel): Indices should be in ``[0, ..., config.num_labels - 1]``. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) : Classification loss. scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`) @@ -1409,7 +1410,7 @@ class BertForQuestionAnswering(BertPreTrainedModel): Position outside of the sequence are not taken into account for computing the loss. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`): diff --git a/src/transformers/modeling_ctrl.py b/src/transformers/modeling_ctrl.py index 5c7b623100..09ec688f4d 100644 --- a/src/transformers/modeling_ctrl.py +++ b/src/transformers/modeling_ctrl.py @@ -291,7 +291,7 @@ class CTRLModel(CTRLPreTrainedModel): ): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`CTRLConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.CTRLConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. past (:obj:`List[torch.FloatTensor]` of length :obj:`config.n_layers` with each tensor of shape :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`): @@ -478,7 +478,7 @@ class CTRLLMHeadModel(CTRLPreTrainedModel): computed for labels in ``[0, ..., config.vocab_size]`` Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.CTRLConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.CTRLConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when ``labels`` is provided) Language modeling loss. prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): diff --git a/src/transformers/modeling_distilbert.py b/src/transformers/modeling_distilbert.py index 7e6d09f686..5123b52ec2 100644 --- a/src/transformers/modeling_distilbert.py +++ b/src/transformers/modeling_distilbert.py @@ -351,6 +351,9 @@ class DistilBertPreTrainedModel(PreTrainedModel): DISTILBERT_START_DOCSTRING = r""" + This model is a PyTorch `torch.nn.Module `_ sub-class. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general + usage and behavior. Parameters: config (:class:`~transformers.DistilBertConfig`): Model configuration class with all the parameters of the model. @@ -416,7 +419,7 @@ class DistilBertModel(DistilBertPreTrainedModel): def forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None): r""" Return: - :obj:`Tuple` comprising various elements depending on the configuration (:class:`~transformers.DistilBertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.DistilBertConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_hidden_states=True``): @@ -513,7 +516,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): in ``[0, ..., config.vocab_size]`` Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.DistilBertConfig`) and inputs: loss (`optional`, returned when ``masked_lm_labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``: Masked language modeling loss. prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`) @@ -560,7 +563,7 @@ class DistilBertForMaskedLM(DistilBertPreTrainedModel): @add_start_docstrings( """DistilBert Model transformer with a sequence classification/regression head on top (a linear layer on top of - the pooled output) e.g. for GLUE tasks. """, + the pooled output) e.g. for GLUE tasks. """, DISTILBERT_START_DOCSTRING, ) class DistilBertForSequenceClassification(DistilBertPreTrainedModel): @@ -585,7 +588,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel): If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy). Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.DistilBertConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`): @@ -637,7 +640,7 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel): @add_start_docstrings( """DistilBert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of - the hidden-states output to compute `span start logits` and `span end logits`). """, + the hidden-states output to compute `span start logits` and `span end logits`). """, DISTILBERT_START_DOCSTRING, ) class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): @@ -672,7 +675,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): Position outside of the sequence are not taken into account for computing the loss. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.DistilBertConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`): @@ -736,7 +739,7 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): @add_start_docstrings( """DistilBert Model with a token classification head on top (a linear layer on top of - the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """, + the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """, DISTILBERT_START_DOCSTRING, ) class DistilBertForTokenClassification(DistilBertPreTrainedModel): @@ -758,7 +761,7 @@ class DistilBertForTokenClassification(DistilBertPreTrainedModel): Indices should be in ``[0, ..., config.num_labels - 1]``. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.DistilBertConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) : Classification loss. scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`) diff --git a/src/transformers/modeling_gpt2.py b/src/transformers/modeling_gpt2.py index 728a676ea4..8ce82aa477 100644 --- a/src/transformers/modeling_gpt2.py +++ b/src/transformers/modeling_gpt2.py @@ -266,8 +266,9 @@ class GPT2PreTrainedModel(PreTrainedModel): GPT2_START_DOCSTRING = r""" - This model is a PyTorch `torch.nn.Module`_ sub-class. Use it as a regular PyTorch Module and - refer to the PyTorch documentation for all matter related to general usage and behavior. + This model is a PyTorch `torch.nn.Module `_ sub-class. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general + usage and behavior. Parameters: config (:class:`~transformers.GPT2Config`): Model configuration class with all the parameters of the model. @@ -362,7 +363,7 @@ class GPT2Model(GPT2PreTrainedModel): ): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.GPT2Config`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. past (:obj:`List[torch.FloatTensor]` of length :obj:`config.n_layers` with each tensor of shape :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`): @@ -547,7 +548,7 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): computed for labels in ``[0, ..., config.vocab_size]`` Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.GPT2Config`) and inputs: loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when ``labels`` is provided) Language modeling loss. prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): @@ -658,7 +659,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): of the input tensors. (see `input_ids` above) Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.GPT2Config`) and inputs: lm_loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``lm_labels`` is provided): Language modeling loss. mc_loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`multiple_choice_labels` is provided): diff --git a/src/transformers/modeling_openai.py b/src/transformers/modeling_openai.py index 45b2270e4c..d70db76f6c 100644 --- a/src/transformers/modeling_openai.py +++ b/src/transformers/modeling_openai.py @@ -375,7 +375,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): ): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.OpenAIGPTConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_hidden_states=True``): @@ -522,7 +522,7 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): computed for labels in ``[0, ..., config.vocab_size]`` Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.OpenAIGPTConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.OpenAIGPTConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when ``labels`` is provided) Language modeling loss. prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): @@ -627,7 +627,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): of the input tensors. (see `input_ids` above) Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.OpenAIGPTConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.OpenAIGPTConfig`) and inputs: lm_loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``lm_labels`` is provided): Language modeling loss. mc_loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`multiple_choice_labels` is provided): diff --git a/src/transformers/modeling_roberta.py b/src/transformers/modeling_roberta.py index cc2c7be158..8855ab3b43 100644 --- a/src/transformers/modeling_roberta.py +++ b/src/transformers/modeling_roberta.py @@ -401,7 +401,7 @@ class RobertaForMultipleChoice(BertPreTrainedModel): of the input tensors. (see `input_ids` above) Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs: loss (:obj:`torch.FloatTensor`` of shape ``(1,)`, `optional`, returned when :obj:`labels` is provided): Classification loss. classification_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_choices)`): @@ -497,7 +497,7 @@ class RobertaForTokenClassification(BertPreTrainedModel): Indices should be in ``[0, ..., config.num_labels - 1]``. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.RobertaConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) : Classification loss. scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`) @@ -616,7 +616,7 @@ class RobertaForQuestionAnswering(BertPreTrainedModel): Position outside of the sequence are not taken into account for computing the loss. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`): diff --git a/src/transformers/modeling_tf_albert.py b/src/transformers/modeling_tf_albert.py index eaa001c39b..9211d6bbed 100644 --- a/src/transformers/modeling_tf_albert.py +++ b/src/transformers/modeling_tf_albert.py @@ -719,7 +719,7 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.AlbertConfig`) and inputs: prediction_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)` Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -776,7 +776,7 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.AlbertConfig`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.AlbertConfig`) and inputs: logits (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, config.num_labels)`) Classification (or regression if config.num_labels==1) scores (before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): diff --git a/src/transformers/modeling_tf_bert.py b/src/transformers/modeling_tf_bert.py index 61e20f0e39..e973cd3277 100644 --- a/src/transformers/modeling_tf_bert.py +++ b/src/transformers/modeling_tf_bert.py @@ -668,7 +668,7 @@ class TFBertModel(TFBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: last_hidden_state (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. pooler_output (:obj:`tf.Tensor` of shape :obj:`(batch_size, hidden_size)`): @@ -724,7 +724,7 @@ class TFBertForPreTraining(TFBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: prediction_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). seq_relationship_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, 2)`): @@ -780,7 +780,7 @@ class TFBertForMaskedLM(TFBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: prediction_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -830,7 +830,7 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: seq_relationship_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, 2)`) Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -886,7 +886,7 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: logits (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -952,7 +952,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel): ): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: classification_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, num_choices)`: `num_choices` is the size of the second dimension of the input tensors. (see `input_ids` above). @@ -1054,7 +1054,7 @@ class TFBertForTokenClassification(TFBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`): Classification scores (before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -1111,7 +1111,7 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: start_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length,)`): Span-start scores (before SoftMax). end_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length,)`): diff --git a/src/transformers/modeling_tf_ctrl.py b/src/transformers/modeling_tf_ctrl.py index 863893ed12..8180a3352e 100644 --- a/src/transformers/modeling_tf_ctrl.py +++ b/src/transformers/modeling_tf_ctrl.py @@ -436,7 +436,7 @@ class TFCTRLModel(TFCTRLPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.CTRLConfig`) and inputs: last_hidden_state (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. past (:obj:`List[tf.Tensor]` of length :obj:`config.n_layers` with each tensor of shape :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`): @@ -509,7 +509,7 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.CTRLConfig`) and inputs: prediction_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). past (:obj:`List[tf.Tensor]` of length :obj:`config.n_layers` with each tensor of shape :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`): diff --git a/src/transformers/modeling_tf_distilbert.py b/src/transformers/modeling_tf_distilbert.py index 964b6169e1..1f7db7ced1 100644 --- a/src/transformers/modeling_tf_distilbert.py +++ b/src/transformers/modeling_tf_distilbert.py @@ -544,7 +544,7 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers,DistilBertConfig`) and inputs: last_hidden_state (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -618,7 +618,7 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel): r""" Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers,DistilBertConfig`) and inputs: prediction_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -658,7 +658,7 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel): @add_start_docstrings( """DistilBert Model transformer with a sequence classification/regression head on top (a linear layer on top of - the pooled output) e.g. for GLUE tasks. """, + the pooled output) e.g. for GLUE tasks. """, DISTILBERT_START_DOCSTRING, ) class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel): @@ -682,7 +682,7 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers,DistilBertConfig`) and inputs: logits (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -740,7 +740,7 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers,DistilBertConfig`) and inputs: scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`): Classification scores (before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -777,7 +777,7 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel): @add_start_docstrings( """DistilBert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of - the hidden-states output to compute `span start logits` and `span end logits`). """, + the hidden-states output to compute `span start logits` and `span end logits`). """, DISTILBERT_START_DOCSTRING, ) class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel): @@ -795,7 +795,7 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers,DistilBertConfig`) and inputs: start_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length,)`): Span-start scores (before SoftMax). end_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length,)`): diff --git a/src/transformers/modeling_tf_gpt2.py b/src/transformers/modeling_tf_gpt2.py index d78e3f8f13..8ecbcfa339 100644 --- a/src/transformers/modeling_tf_gpt2.py +++ b/src/transformers/modeling_tf_gpt2.py @@ -452,7 +452,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.GPT2Config`) and inputs: last_hidden_state (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. past (:obj:`List[tf.Tensor]` of length :obj:`config.n_layers` with each tensor of shape :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`): @@ -504,7 +504,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.GPT2Config`) and inputs: prediction_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). past (:obj:`List[tf.Tensor]` of length :obj:`config.n_layers` with each tensor of shape :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`): @@ -585,7 +585,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): Selected in the range ``[0, input_ids.size(-1) - 1[``. Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.GPT2Config`) and inputs: lm_prediction_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, num_choices, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). mc_prediction_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, num_choices)`): diff --git a/src/transformers/modeling_tf_openai.py b/src/transformers/modeling_tf_openai.py index 36f6e36adb..69263639ec 100644 --- a/src/transformers/modeling_tf_openai.py +++ b/src/transformers/modeling_tf_openai.py @@ -435,7 +435,7 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.OpenAIGPTConfig`) and inputs: last_hidden_state (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. hidden_states (:obj:`tuple(tf.Tensor)` `optional`, returned when ``config.output_hidden_states=True``): @@ -483,7 +483,7 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.OpenAIGPTConfig`) and inputs: prediction_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``config.output_hidden_states=True``): @@ -558,7 +558,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): Selected in the range ``[0, input_ids.size(-1) - 1[``. Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.OpenAIGPTConfig`) and inputs: lm_prediction_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, num_choices, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). mc_prediction_scores (:obj:`tf.Tensor` of shape :obj:`(batch_size, num_choices)`): diff --git a/src/transformers/modeling_tf_roberta.py b/src/transformers/modeling_tf_roberta.py index cba64e5990..572d8e8b5f 100644 --- a/src/transformers/modeling_tf_roberta.py +++ b/src/transformers/modeling_tf_roberta.py @@ -187,7 +187,7 @@ class TFRobertaModel(TFRobertaPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs: last_hidden_state (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. pooler_output (:obj:`tf.Tensor` of shape :obj:`(batch_size, hidden_size)`): @@ -269,7 +269,7 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs: prediction_scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -346,7 +346,7 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs: logits (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): @@ -402,7 +402,7 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.RobertaConfig`) and inputs: scores (:obj:`Numpy array` or :obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`): Classification scores (before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`): diff --git a/src/transformers/modeling_tf_transfo_xl.py b/src/transformers/modeling_tf_transfo_xl.py index f05a6e5a5d..92b251ca42 100644 --- a/src/transformers/modeling_tf_transfo_xl.py +++ b/src/transformers/modeling_tf_transfo_xl.py @@ -695,7 +695,7 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.TransfoXLConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. mems (:obj:`List[torch.FloatTensor]` of length :obj:`config.n_layers`): @@ -759,7 +759,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, labels=None, training=False): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.TransfoXLConfig`) and inputs: prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). mems (:obj:`List[torch.FloatTensor]` of length :obj:`config.n_layers`): diff --git a/src/transformers/modeling_tf_xlm.py b/src/transformers/modeling_tf_xlm.py index 601f2d44e1..6dd2215ce8 100644 --- a/src/transformers/modeling_tf_xlm.py +++ b/src/transformers/modeling_tf_xlm.py @@ -579,7 +579,7 @@ class TFXLMModel(TFXLMPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: last_hidden_state (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``config.output_hidden_states=True``): @@ -661,7 +661,7 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: prediction_scores (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``config.output_hidden_states=True``): @@ -714,7 +714,7 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: logits (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). hidden_states (:obj:`tuple(tf.Tensor)`, `optional`, returned when ``config.output_hidden_states=True``): @@ -768,7 +768,7 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: start_scores (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length,)`): Span-start scores (before SoftMax). end_scores (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length,)`): diff --git a/src/transformers/modeling_tf_xlnet.py b/src/transformers/modeling_tf_xlnet.py index f9c4bb18fa..a7cce9e350 100644 --- a/src/transformers/modeling_tf_xlnet.py +++ b/src/transformers/modeling_tf_xlnet.py @@ -788,7 +788,7 @@ class TFXLNetModel(TFXLNetPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: last_hidden_state (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. mems (:obj:`List[tf.Tensor]` of length :obj:`config.n_layers`): @@ -841,7 +841,7 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: prediction_scores (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). mems (:obj:`List[tf.Tensor]` of length :obj:`config.n_layers`): @@ -910,7 +910,7 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - `tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: logits (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). mems (:obj:`List[tf.Tensor]` of length :obj:`config.n_layers`): @@ -970,7 +970,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel): def call(self, inputs, **kwargs): r""" Return: - `tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: logits (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:(batch_size, config.num_labels)`): Classification scores (before SoftMax). mems (:obj:`List[tf.Tensor]` of length :obj:`config.n_layers`): @@ -1028,7 +1028,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel): def call(self, inputs, **kwargs): r""" Returns: - :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(tf.Tensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: loss (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_scores (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, sequence_length,)`): diff --git a/src/transformers/modeling_transfo_xl.py b/src/transformers/modeling_transfo_xl.py index b9d4553155..6fe846e448 100644 --- a/src/transformers/modeling_transfo_xl.py +++ b/src/transformers/modeling_transfo_xl.py @@ -673,7 +673,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel): def forward(self, input_ids=None, mems=None, head_mask=None, inputs_embeds=None): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.TransfoXLConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. mems (:obj:`List[torch.FloatTensor]` of length :obj:`config.n_layers`): @@ -860,7 +860,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): computed for labels in ``[0, ..., config.vocab_size]`` Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.TransfoXLConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when ``labels`` is provided) Language modeling loss. prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 098ccef4f2..381b0013f2 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -306,24 +306,22 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): Parameters: pretrained_model_name_or_path: either: - - - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``. - - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``. - - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``. - - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards. - - None if you are both providing the configuration and state dictionary (resp. with keyword arguments ``config`` and ``state_dict``) + - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``. + - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``. + - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``. + - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards. + - None if you are both providing the configuration and state dictionary (resp. with keyword arguments ``config`` and ``state_dict``) model_args: (`optional`) Sequence of positional arguments: All remaning positional arguments will be passed to the underlying model's ``__init__`` method config: (`optional`) one of: - - an instance of a class derived from :class:`~transformers.PretrainedConfig`, or - - a string valid as input to :func:`~transformers.PretrainedConfig.from_pretrained()` + - an instance of a class derived from :class:`~transformers.PretrainedConfig`, or + - a string valid as input to :func:`~transformers.PretrainedConfig.from_pretrained()` Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when: - - - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or - - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory. - - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory. + - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or + - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory. + - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory. state_dict: (`optional`) dict: an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file. diff --git a/src/transformers/modeling_xlm.py b/src/transformers/modeling_xlm.py index 69b4d198aa..3fa046dfa5 100644 --- a/src/transformers/modeling_xlm.py +++ b/src/transformers/modeling_xlm.py @@ -416,7 +416,7 @@ class XLMModel(XLMPreTrainedModel): ): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``config.output_hidden_states=True``): @@ -665,7 +665,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): computed for labels in ``[0, ..., config.vocab_size]`` Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when ``labels`` is provided) Language modeling loss. prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): @@ -747,7 +747,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel): If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy). Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`): @@ -844,7 +844,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel): Position outside of the sequence are not taken into account for computing the loss. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`): @@ -969,7 +969,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): 1.0 means token should be masked. 0.0 mean token is not masked. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLMConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned if both :obj:`start_positions` and :obj:`end_positions` are provided): Classification loss as the sum of start token, end token (and is_impossible if provided) classification losses. start_top_log_probs (``torch.FloatTensor`` of shape ``(batch_size, config.start_n_top)``, `optional`, returned if ``start_positions`` or ``end_positions`` is not provided): diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index 9d698423ec..077a1dfdc7 100644 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -713,7 +713,7 @@ class XLNetModel(XLNetPreTrainedModel): ): r""" Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the last layer of the model. mems (:obj:`List[torch.FloatTensor]` of length :obj:`config.n_layers`): @@ -992,7 +992,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): computed for labels in ``[0, ..., config.vocab_size]`` Return: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:obj:`~transformers.GPT2Config`) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when ``labels`` is provided) Language modeling loss. prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`): @@ -1090,7 +1090,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): If ``config.num_labels > 1`` a classification loss is computed (Cross-Entropy). Return: - `tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`torch.FloatTensor` of shape :obj:(batch_size, config.num_labels)`): @@ -1188,7 +1188,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel): of the input tensors. (see `input_ids` above) Return: - `tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Classification loss. logits (:obj:`torch.FloatTensor` of shape :obj:(batch_size, config.num_labels)`): @@ -1288,7 +1288,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel): of the input tensors. (see `input_ids` above) Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: loss (:obj:`torch.FloatTensor`` of shape ``(1,)`, `optional`, returned when :obj:`labels` is provided): Classification loss. classification_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_choices)`): @@ -1399,7 +1399,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel): Position outside of the sequence are not taken into account for computing the loss. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Total span extraction loss is the sum of a Cross-Entropy for the start and end positions. start_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`): @@ -1528,7 +1528,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): 1.0 means token should be masked. 0.0 mean token is not masked. Returns: - :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (config) and inputs: + :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.XLNetConfig`) and inputs: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned if both :obj:`start_positions` and :obj:`end_positions` are provided): Classification loss as the sum of start token, end token (and is_impossible if provided) classification losses. start_top_log_probs (``torch.FloatTensor`` of shape ``(batch_size, config.start_n_top)``, `optional`, returned if ``start_positions`` or ``end_positions`` is not provided):