From a1720694a519ccd451a59970311d9e315ed710f9 Mon Sep 17 00:00:00 2001 From: Julien Plu Date: Wed, 27 Jan 2021 10:45:42 +0100 Subject: [PATCH] Remove a TF usage warning and rework the documentation (#9756) * Rework documentation * Update the template * Trigger CI * Restore the warning but with the TF logger * Update convbert doc --- src/transformers/modeling_tf_utils.py | 5 ++- .../models/albert/modeling_tf_albert.py | 9 ++-- .../models/bart/modeling_tf_bart.py | 9 ++-- .../models/bert/modeling_tf_bert.py | 9 ++-- .../blenderbot/modeling_tf_blenderbot.py | 33 +++++++++++---- .../modeling_tf_blenderbot_small.py | 33 +++++++++++---- .../models/convbert/modeling_tf_convbert.py | 9 ++-- .../models/ctrl/modeling_tf_ctrl.py | 9 ++-- .../distilbert/modeling_tf_distilbert.py | 9 ++-- .../models/dpr/modeling_tf_dpr.py | 26 ++++++++---- .../models/electra/modeling_tf_electra.py | 9 ++-- .../models/flaubert/modeling_tf_flaubert.py | 9 ++-- .../models/funnel/modeling_tf_funnel.py | 9 ++-- .../models/gpt2/modeling_tf_gpt2.py | 9 ++-- .../models/led/modeling_tf_led.py | 9 ++-- .../longformer/modeling_tf_longformer.py | 9 ++-- .../models/lxmert/modeling_tf_lxmert.py | 9 ++-- .../models/marian/modeling_tf_marian.py | 33 +++++++++++---- .../models/mbart/modeling_tf_mbart.py | 33 +++++++++++---- .../mobilebert/modeling_tf_mobilebert.py | 9 ++-- .../models/mpnet/modeling_tf_mpnet.py | 9 ++-- .../models/openai/modeling_tf_openai.py | 9 ++-- .../models/pegasus/modeling_tf_pegasus.py | 36 ++++++++++++---- .../models/roberta/modeling_tf_roberta.py | 9 ++-- src/transformers/models/t5/modeling_tf_t5.py | 9 ++-- .../transfo_xl/modeling_tf_transfo_xl.py | 9 ++-- .../models/xlm/modeling_tf_xlm.py | 9 ++-- .../models/xlnet/modeling_tf_xlnet.py | 9 ++-- ...tf_{{cookiecutter.lowercase_modelname}}.py | 42 +++++++++++++------ 29 files changed, 299 insertions(+), 131 deletions(-) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index ff40551da1..be463c0fb7 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -44,6 +44,7 @@ from .utils import logging logger = logging.get_logger(__name__) +tf_logger = tf.get_logger() class TFModelUtilsMixin: @@ -285,7 +286,7 @@ def booleans_processing(config, **kwargs): or kwargs["output_hidden_states"] is not None or ("use_cache" in kwargs and kwargs["use_cache"] is not None) ): - tf.print( + tf_logger.warn( "The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model." "They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`)." ) @@ -294,7 +295,7 @@ def booleans_processing(config, **kwargs): final_booleans["output_hidden_states"] = config.output_hidden_states if kwargs["return_dict"] is not None: - tf.print("The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.") + tf_logger.warn("The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.") final_booleans["return_dict"] = True if "use_cache" in kwargs: diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 108f55dcf6..54bb69eba4 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -802,12 +802,15 @@ ALBERT_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index ad72b06bd0..8c5e641a59 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -574,12 +574,15 @@ BART_INPUTS_DOCSTRING = r""" decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 01d51ddaa5..890b362ac4 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -881,12 +881,15 @@ BERT_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index 669c211c56..cffc6095ad 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -571,12 +571,15 @@ BLENDERBOT_INPUTS_DOCSTRING = r""" decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -661,12 +664,18 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -854,12 +863,18 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index 710697f58a..a1b5d26dbd 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -576,12 +576,15 @@ BLENDERBOT_SMALL_INPUTS_DOCSTRING = r""" decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -666,12 +669,18 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -859,12 +868,18 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index 650c13c129..fbd9ecb4f4 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -803,12 +803,15 @@ CONVBERT_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index 8b29c7f4bc..81930b83fb 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -516,12 +516,15 @@ CTRL_INPUTS_DOCSTRING = r""" ``past``). output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 64786f3ed9..3a43976621 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -585,12 +585,15 @@ DISTILBERT_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index 2d6e01a7b9..cc595b85cd 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -523,12 +523,18 @@ TF_DPR_ENCODERS_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ TF_DPR_READER_INPUTS_DOCSTRING = r""" @@ -556,14 +562,16 @@ TF_DPR_READER_INPUTS_DOCSTRING = r""" Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. This is useful if you want more control over how to convert :obj:`input_ids` indices into associated vectors than the model's internal embedding lookup matrix. - output_attentions (:obj:`bool`, `optional`): - Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. output_hidden_states (:obj:`bool`, `optional`): - Whether or not to rturn the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index a75c406170..5198943f9b 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -773,12 +773,15 @@ ELECTRA_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index f24dfa7473..db30f188a8 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -156,12 +156,15 @@ FLAUBERT_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index 819b553d3f..676b9b769a 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -1131,12 +1131,15 @@ FUNNEL_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 2012cbb0a6..6c6fcacb0a 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -552,12 +552,15 @@ GPT2_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index f4a0a52405..9896d0bc2e 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1451,12 +1451,15 @@ LED_INPUTS_DOCSTRING = r""" decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index c9f2838fb2..4f4e449c56 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -2007,12 +2007,15 @@ LONGFORMER_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index 16b72f2466..0493ef5cdd 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -1015,12 +1015,15 @@ LXMERT_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index 78fac03a5d..16ec9269ab 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -595,12 +595,15 @@ MARIAN_INPUTS_DOCSTRING = r""" decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -683,12 +686,18 @@ class TFMarianEncoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -872,12 +881,18 @@ class TFMarianDecoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 6f77248f70..8562b0eb17 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -552,12 +552,15 @@ MBART_INPUTS_DOCSTRING = r""" decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -672,12 +675,18 @@ class TFMBartEncoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -867,12 +876,18 @@ class TFMBartDecoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 4035151405..386dfbee6b 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -1009,12 +1009,15 @@ MOBILEBERT_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index e1ff0ba701..33f90cfcf9 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -748,12 +748,15 @@ MPNET_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 6a725e116e..1c4729fc6c 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -485,12 +485,15 @@ OPENAI_GPT_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index b36e36c437..a218076d76 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -598,14 +598,20 @@ PEGASUS_INPUTS_DOCSTRING = r""" use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`): If set to :obj:`True`, :obj:`past_key_values` key value states are returned and can be used to speed up decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation + output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all + attention layers. See ``attentions`` under returned tensors for more detail. This argument can be used only + in eager mode, in graph mode the value in the config will be used instead. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -689,12 +695,18 @@ class TFPegasusEncoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -881,12 +893,18 @@ class TFPegasusDecoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index e8f3e18880..9580d6b02c 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -784,12 +784,15 @@ ROBERTA_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 50c792fbc5..9e6b16bfc1 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -999,12 +999,15 @@ T5_INPUTS_DOCSTRING = r""" decoding (see :obj:`past_key_values`). output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index 959e80d65b..6936030af0 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -828,12 +828,15 @@ TRANSFO_XL_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index 8cd3c7ef48..e7812846bf 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -669,12 +669,15 @@ XLM_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 8269bb4dda..56fc4ecd2d 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -1133,12 +1133,15 @@ XLNET_INPUTS_DOCSTRING = r""" vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index 49c31a5c3b..4aad02333f 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -778,12 +778,15 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel): vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -2016,12 +2019,15 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel): decoding (see :obj:`past_key_values`). Set to :obj:`False` during training, :obj:`True` during generation output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned - tensors for more detail. + tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the + config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for - more detail. + more detail. This argument can be used only in eager mode, in graph mode the value in the config will be + used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. training (:obj:`bool`, `optional`, defaults to :obj:`False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -2100,12 +2106,18 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call, @@ -2263,12 +2275,18 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer): into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under - returned tensors for more detail. + returned tensors for more detail. This argument can be used only in eager mode, in graph mode the value + in the config will be used instead. output_hidden_states (:obj:`bool`, `optional`): Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors - for more detail. + for more detail. This argument can be used only in eager mode, in graph mode the value in the config + will be used instead. return_dict (:obj:`bool`, `optional`): - Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. This + argument can be used in eager mode, in graph mode the value will always be set to True. + training (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not to use the model in training mode (some modules like dropout modules have different + behaviors between training and evaluation). """ inputs = input_processing( func=self.call,