Big file_utils cleanup (#16396)
* Big file_utils cleanup * This one still needs to be treated separately
This commit is contained in:
@@ -535,7 +535,7 @@ to make your debugging environment as efficient as possible.
|
||||
due to multiple dropout layers in the model. Make sure that the
|
||||
forward pass in your debugging environment is **deterministic** so
|
||||
that the dropout layers are not used. Or use
|
||||
`transformers.file_utils.set_seed` if the old and new
|
||||
`transformers.utils.set_seed` if the old and new
|
||||
implementations are in the same framework.
|
||||
|
||||
#### More details on how to create a debugging environment for [camelcase name of model]
|
||||
|
||||
@@ -119,7 +119,7 @@ _TOKENIZER_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Tokenizer"
|
||||
- 0 indicates the head is **masked**.
|
||||
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
|
||||
"""
|
||||
|
||||
@@ -1244,7 +1244,7 @@ _TOKENIZER_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Tokenizer"
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
|
||||
|
||||
@@ -1275,7 +1275,7 @@ _TOKENIZER_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Tokenizer"
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
|
||||
{{cookiecutter.uppercase_modelname}}_DECODE_INPUTS_DOCSTRING = r"""
|
||||
@@ -1322,7 +1322,7 @@ _TOKENIZER_FOR_DOC = "{{cookiecutter.camelcase_modelname}}Tokenizer"
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
|
||||
def shift_tokens_right(input_ids: jnp.ndarray, pad_token_id: int, decoder_start_token_id: int) -> jnp.ndarray:
|
||||
|
||||
@@ -925,7 +925,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
|
||||
more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
|
||||
used instead.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This
|
||||
argument can be used in eager mode, in graph mode the value will always be set to True.
|
||||
training (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to use the model in training mode (some modules like dropout modules have different
|
||||
@@ -2338,7 +2338,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
|
||||
more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
|
||||
used instead.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This
|
||||
argument can be used in eager mode, in graph mode the value will always be set to True.
|
||||
training (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to use the model in training mode (some modules like dropout modules have different
|
||||
@@ -2429,7 +2429,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
|
||||
for more detail. This argument can be used only in eager mode, in graph mode the value in the config
|
||||
will be used instead.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This
|
||||
argument can be used in eager mode, in graph mode the value will always be set to True.
|
||||
training (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to use the model in training mode (some modules like dropout modules have different
|
||||
@@ -2626,7 +2626,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
|
||||
for more detail. This argument can be used only in eager mode, in graph mode the value in the config
|
||||
will be used instead.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This
|
||||
argument can be used in eager mode, in graph mode the value will always be set to True.
|
||||
training (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to use the model in training mode (some modules like dropout modules have different
|
||||
|
||||
@@ -746,7 +746,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
|
||||
|
||||
@@ -2157,7 +2157,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
|
||||
|
||||
@@ -2186,7 +2186,7 @@ class {{cookiecutter.camelcase_modelname}}PreTrainedModel(PreTrainedModel):
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
||||
more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
|
||||
|
||||
@@ -2272,7 +2272,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
|
||||
for more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -2494,7 +2494,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
|
||||
for more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
"""
|
||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||
output_hidden_states = (
|
||||
@@ -3270,7 +3270,7 @@ class {{cookiecutter.camelcase_modelname}}ForCausalLM({{cookiecutter.camelcase_m
|
||||
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
|
||||
for more detail.
|
||||
return_dict (`bool`, *optional*):
|
||||
Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
|
||||
Returns:
|
||||
|
||||
|
||||
@@ -532,7 +532,7 @@ to make your debugging environment as efficient as possible.
|
||||
due to multiple dropout layers in the model. Make sure that the
|
||||
forward pass in your debugging environment is **deterministic** so
|
||||
that the dropout layers are not used. Or use
|
||||
`transformers.file_utils.set_seed` if the old and new
|
||||
`transformers.utils.set_seed` if the old and new
|
||||
implementations are in the same framework.
|
||||
|
||||
#### (Important) More details on how to create a debugging environment for BigBird
|
||||
|
||||
Reference in New Issue
Block a user