From 6c78bbcb8320d316434262ef003251ca997db0d1 Mon Sep 17 00:00:00 2001 From: Sparty Date: Wed, 10 Jan 2024 12:20:39 -0500 Subject: [PATCH] [docstring] Fix docstring for ErnieConfig, ErnieMConfig (#27029) * Remove ErnieConfig, ErnieMConfig check_docstrings * Run fix_and_overwrite for ErnieConfig, ErnieMConfig * Replace and in configuration_ernie, configuration_ernie_m.py with type and docstring values --------- Co-authored-by: vignesh-raghunathan --- .../models/ernie/configuration_ernie.py | 4 ++-- .../models/ernie_m/configuration_ernie_m.py | 17 ++++++++--------- utils/check_docstrings.py | 2 -- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/transformers/models/ernie/configuration_ernie.py b/src/transformers/models/ernie/configuration_ernie.py index 143fb8cc58..7278a74ece 100644 --- a/src/transformers/models/ernie/configuration_ernie.py +++ b/src/transformers/models/ernie/configuration_ernie.py @@ -81,14 +81,14 @@ class ErnieConfig(PretrainedConfig): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. layer_norm_eps (`float`, *optional*, defaults to 1e-12): The epsilon used by the layer normalization layers. + pad_token_id (`int`, *optional*, defaults to 0): + Padding token id. position_embedding_type (`str`, *optional*, defaults to `"absolute"`): Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155). For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658). - is_decoder (`bool`, *optional*, defaults to `False`): - Whether the model is used as a decoder or not. If `False`, the model is used as an encoder. use_cache (`bool`, *optional*, defaults to `True`): Whether or not the model should return the last key/values attentions (not used by all models). Only relevant if `config.is_decoder=True`. diff --git a/src/transformers/models/ernie_m/configuration_ernie_m.py b/src/transformers/models/ernie_m/configuration_ernie_m.py index eb7eaad837..85917dc828 100644 --- a/src/transformers/models/ernie_m/configuration_ernie_m.py +++ b/src/transformers/models/ernie_m/configuration_ernie_m.py @@ -61,19 +61,20 @@ class ErnieMConfig(PretrainedConfig): The dropout probability for all fully connected layers in the embeddings and encoder. attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1): The dropout probability used in `MultiHeadAttention` in all encoder layers to drop some attention target. - act_dropout (`float`, *optional*, defaults to 0.0): - This dropout probability is used in `ErnieMEncoderLayer` after activation. - max_position_embeddings (`int`, *optional*, defaults to 512): + max_position_embeddings (`int`, *optional*, defaults to 514): The maximum value of the dimensionality of position encoding, which dictates the maximum supported length of an input sequence. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the normal initializer for initializing all weight matrices. The index of padding + token in the token vocabulary. + pad_token_id (`int`, *optional*, defaults to 1): + Padding token id. layer_norm_eps (`float`, *optional*, defaults to 1e-05): The epsilon used by the layer normalization layers. classifier_dropout (`float`, *optional*): The dropout ratio for the classification head. - initializer_range (`float`, *optional*, defaults to 0.02): - The standard deviation of the normal initializer for initializing all weight matrices. - pad_token_id(`int`, *optional*, defaults to 1): - The index of padding token in the token vocabulary. + act_dropout (`float`, *optional*, defaults to 0.0): + This dropout probability is used in `ErnieMEncoderLayer` after activation. A normal_initializer initializes weight matrices as normal distributions. See `ErnieMPretrainedModel._init_weights()` for how weights are initialized in `ErnieMModel`. @@ -97,7 +98,6 @@ class ErnieMConfig(PretrainedConfig): pad_token_id: int = 1, layer_norm_eps: float = 1e-05, classifier_dropout=None, - is_decoder=False, act_dropout=0.0, **kwargs, ): @@ -114,5 +114,4 @@ class ErnieMConfig(PretrainedConfig): self.initializer_range = initializer_range self.layer_norm_eps = layer_norm_eps self.classifier_dropout = classifier_dropout - self.is_decoder = is_decoder self.act_dropout = act_dropout diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index b9a1f0fde1..cd6d0dc33f 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -166,8 +166,6 @@ OBJECTS_TO_IGNORE = [ "ElectraTokenizerFast", "EncoderDecoderModel", "EncoderRepetitionPenaltyLogitsProcessor", - "ErnieConfig", - "ErnieMConfig", "ErnieMModel", "ErnieModel", "ErnieMTokenizer",