[docstring] Fix docstring for ErnieConfig, ErnieMConfig (#27029)
* Remove ErnieConfig, ErnieMConfig check_docstrings * Run fix_and_overwrite for ErnieConfig, ErnieMConfig * Replace <fill_type> and <fill_docstring> in configuration_ernie, configuration_ernie_m.py with type and docstring values --------- Co-authored-by: vignesh-raghunathan <vignesh_raghunathan@intuit.com>
This commit is contained in:
@@ -81,14 +81,14 @@ class ErnieConfig(PretrainedConfig):
|
|||||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||||
layer_norm_eps (`float`, *optional*, defaults to 1e-12):
|
layer_norm_eps (`float`, *optional*, defaults to 1e-12):
|
||||||
The epsilon used by the layer normalization layers.
|
The epsilon used by the layer normalization layers.
|
||||||
|
pad_token_id (`int`, *optional*, defaults to 0):
|
||||||
|
Padding token id.
|
||||||
position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
|
position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
|
||||||
Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For
|
Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For
|
||||||
positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
|
positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
|
||||||
[Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
|
[Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
|
||||||
For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
|
For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
|
||||||
with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
|
with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
|
||||||
is_decoder (`bool`, *optional*, defaults to `False`):
|
|
||||||
Whether the model is used as a decoder or not. If `False`, the model is used as an encoder.
|
|
||||||
use_cache (`bool`, *optional*, defaults to `True`):
|
use_cache (`bool`, *optional*, defaults to `True`):
|
||||||
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
||||||
relevant if `config.is_decoder=True`.
|
relevant if `config.is_decoder=True`.
|
||||||
|
|||||||
@@ -61,19 +61,20 @@ class ErnieMConfig(PretrainedConfig):
|
|||||||
The dropout probability for all fully connected layers in the embeddings and encoder.
|
The dropout probability for all fully connected layers in the embeddings and encoder.
|
||||||
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
|
attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
|
||||||
The dropout probability used in `MultiHeadAttention` in all encoder layers to drop some attention target.
|
The dropout probability used in `MultiHeadAttention` in all encoder layers to drop some attention target.
|
||||||
act_dropout (`float`, *optional*, defaults to 0.0):
|
max_position_embeddings (`int`, *optional*, defaults to 514):
|
||||||
This dropout probability is used in `ErnieMEncoderLayer` after activation.
|
|
||||||
max_position_embeddings (`int`, *optional*, defaults to 512):
|
|
||||||
The maximum value of the dimensionality of position encoding, which dictates the maximum supported length
|
The maximum value of the dimensionality of position encoding, which dictates the maximum supported length
|
||||||
of an input sequence.
|
of an input sequence.
|
||||||
|
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||||
|
The standard deviation of the normal initializer for initializing all weight matrices. The index of padding
|
||||||
|
token in the token vocabulary.
|
||||||
|
pad_token_id (`int`, *optional*, defaults to 1):
|
||||||
|
Padding token id.
|
||||||
layer_norm_eps (`float`, *optional*, defaults to 1e-05):
|
layer_norm_eps (`float`, *optional*, defaults to 1e-05):
|
||||||
The epsilon used by the layer normalization layers.
|
The epsilon used by the layer normalization layers.
|
||||||
classifier_dropout (`float`, *optional*):
|
classifier_dropout (`float`, *optional*):
|
||||||
The dropout ratio for the classification head.
|
The dropout ratio for the classification head.
|
||||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
act_dropout (`float`, *optional*, defaults to 0.0):
|
||||||
The standard deviation of the normal initializer for initializing all weight matrices.
|
This dropout probability is used in `ErnieMEncoderLayer` after activation.
|
||||||
pad_token_id(`int`, *optional*, defaults to 1):
|
|
||||||
The index of padding token in the token vocabulary.
|
|
||||||
|
|
||||||
A normal_initializer initializes weight matrices as normal distributions. See
|
A normal_initializer initializes weight matrices as normal distributions. See
|
||||||
`ErnieMPretrainedModel._init_weights()` for how weights are initialized in `ErnieMModel`.
|
`ErnieMPretrainedModel._init_weights()` for how weights are initialized in `ErnieMModel`.
|
||||||
@@ -97,7 +98,6 @@ class ErnieMConfig(PretrainedConfig):
|
|||||||
pad_token_id: int = 1,
|
pad_token_id: int = 1,
|
||||||
layer_norm_eps: float = 1e-05,
|
layer_norm_eps: float = 1e-05,
|
||||||
classifier_dropout=None,
|
classifier_dropout=None,
|
||||||
is_decoder=False,
|
|
||||||
act_dropout=0.0,
|
act_dropout=0.0,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
@@ -114,5 +114,4 @@ class ErnieMConfig(PretrainedConfig):
|
|||||||
self.initializer_range = initializer_range
|
self.initializer_range = initializer_range
|
||||||
self.layer_norm_eps = layer_norm_eps
|
self.layer_norm_eps = layer_norm_eps
|
||||||
self.classifier_dropout = classifier_dropout
|
self.classifier_dropout = classifier_dropout
|
||||||
self.is_decoder = is_decoder
|
|
||||||
self.act_dropout = act_dropout
|
self.act_dropout = act_dropout
|
||||||
|
|||||||
@@ -166,8 +166,6 @@ OBJECTS_TO_IGNORE = [
|
|||||||
"ElectraTokenizerFast",
|
"ElectraTokenizerFast",
|
||||||
"EncoderDecoderModel",
|
"EncoderDecoderModel",
|
||||||
"EncoderRepetitionPenaltyLogitsProcessor",
|
"EncoderRepetitionPenaltyLogitsProcessor",
|
||||||
"ErnieConfig",
|
|
||||||
"ErnieMConfig",
|
|
||||||
"ErnieMModel",
|
"ErnieMModel",
|
||||||
"ErnieModel",
|
"ErnieModel",
|
||||||
"ErnieMTokenizer",
|
"ErnieMTokenizer",
|
||||||
|
|||||||
Reference in New Issue
Block a user