From 9234caefb0241939f7b2b0ee3d73ed5ebf842ae9 Mon Sep 17 00:00:00 2001 From: Akshar Goyal Date: Tue, 31 Oct 2023 06:20:14 -0400 Subject: [PATCH] [docstring] Fix docstring for AltCLIPTextConfig, AltCLIPVisionConfig and AltCLIPConfig (#27128) * [docstring] Fix docstring for AltCLIPVisionConfig, AltCLIPTextConfig + cleaned some docstring * Removed entries from check_docstring.py * Removed entries from check_docstring.py * Removed entry from check_docstring.py * [docstring] Fix docstring for AltCLIPTextConfig, AltCLIPVisionConfig and AltCLIPConfig --- .../models/altclip/configuration_altclip.py | 19 +++++++++++++++---- .../bridgetower/configuration_bridgetower.py | 6 +++--- .../configuration_chinese_clip.py | 2 +- .../models/clipseg/configuration_clipseg.py | 2 +- .../mask2former/configuration_mask2former.py | 2 +- .../models/owlv2/configuration_owlv2.py | 2 +- .../models/owlvit/configuration_owlvit.py | 2 +- .../pix2struct/configuration_pix2struct.py | 2 +- .../models/x_clip/configuration_x_clip.py | 4 ++-- utils/check_docstrings.py | 2 -- 10 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/transformers/models/altclip/configuration_altclip.py b/src/transformers/models/altclip/configuration_altclip.py index 431c61565b..0320064520 100755 --- a/src/transformers/models/altclip/configuration_altclip.py +++ b/src/transformers/models/altclip/configuration_altclip.py @@ -61,12 +61,19 @@ class AltCLIPTextConfig(PretrainedConfig): max_position_embeddings (`int`, *optional*, defaults to 514): The maximum sequence length that this model might ever be used with. Typically set this to something large just in case (e.g., 512 or 1024 or 2048). - type_vocab_size (`int`, *optional*, defaults to 2): + type_vocab_size (`int`, *optional*, defaults to 1): The vocabulary size of the `token_type_ids` passed when calling [`AltCLIPTextModel`] initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - layer_norm_eps (`float`, *optional*, defaults to 1e-5): + initializer_factor (`float`, *optional*, defaults to 0.02): + A factor for initializing all weight matrices (should be kept to 1, used internally for initialization + testing). + layer_norm_eps (`float`, *optional*, defaults to 1e-05): The epsilon used by the layer normalization layers. + pad_token_id (`int`, *optional*, defaults to 1): The id of the *padding* token. + bos_token_id (`int`, *optional*, defaults to 0): The id of the *beginning-of-sequence* token. + eos_token_id (`Union[int, List[int]]`, *optional*, defaults to 2): + The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens. position_embedding_type (`str`, *optional*, defaults to `"absolute"`): Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to @@ -154,10 +161,14 @@ class AltCLIPVisionConfig(PretrainedConfig): Dimensionality of the encoder layers and the pooler layer. intermediate_size (`int`, *optional*, defaults to 3072): Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. + projection_dim (`int`, *optional*, defaults to 512): + Dimentionality of text and vision projection layers. num_hidden_layers (`int`, *optional*, defaults to 12): Number of hidden layers in the Transformer encoder. num_attention_heads (`int`, *optional*, defaults to 12): Number of attention heads for each attention layer in the Transformer encoder. + num_channels (`int`, *optional*, defaults to 3): + The number of input channels. image_size (`int`, *optional*, defaults to 224): The size (resolution) of each image. patch_size (`int`, *optional*, defaults to 32): @@ -165,13 +176,13 @@ class AltCLIPVisionConfig(PretrainedConfig): hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`): The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported. - layer_norm_eps (`float`, *optional*, defaults to 1e-5): + layer_norm_eps (`float`, *optional*, defaults to 1e-05): The epsilon used by the layer normalization layers. attention_dropout (`float`, *optional*, defaults to 0.0): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1): + initializer_factor (`float`, *optional*, defaults to 1.0): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). diff --git a/src/transformers/models/bridgetower/configuration_bridgetower.py b/src/transformers/models/bridgetower/configuration_bridgetower.py index 30b6bf2879..e44373cba5 100644 --- a/src/transformers/models/bridgetower/configuration_bridgetower.py +++ b/src/transformers/models/bridgetower/configuration_bridgetower.py @@ -49,7 +49,7 @@ class BridgeTowerVisionConfig(PretrainedConfig): The size (resolution) of each patch. image_size (`int`, *optional*, defaults to 288): The size (resolution) of each image. - initializer_factor (`float``, *optional*, defaults to 1): + initializer_factor (`float`, *optional*, defaults to 1): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). layer_norm_eps (`float`, *optional*, defaults to 1e-05): @@ -151,7 +151,7 @@ class BridgeTowerTextConfig(PretrainedConfig): just in case (e.g., 512 or 1024 or 2048). type_vocab_size (`int`, *optional*, defaults to 2): The vocabulary size of the `token_type_ids`. - initializer_factor (`float``, *optional*, defaults to 1): + initializer_factor (`float`, *optional*, defaults to 1): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). layer_norm_eps (`float`, *optional*, defaults to 1e-05): @@ -255,7 +255,7 @@ class BridgeTowerConfig(PretrainedConfig): The non-linear activation function (function or string) in the encoder and pooler. hidden_size (`int`, *optional*, defaults to 768): Dimensionality of the encoder layers and the pooler layer. - initializer_factor (`float``, *optional*, defaults to 1): + initializer_factor (`float`, *optional*, defaults to 1): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). layer_norm_eps (`float`, *optional*, defaults to 1e-05): diff --git a/src/transformers/models/chinese_clip/configuration_chinese_clip.py b/src/transformers/models/chinese_clip/configuration_chinese_clip.py index 7bcfc73799..0e91200ce6 100644 --- a/src/transformers/models/chinese_clip/configuration_chinese_clip.py +++ b/src/transformers/models/chinese_clip/configuration_chinese_clip.py @@ -203,7 +203,7 @@ class ChineseCLIPVisionConfig(PretrainedConfig): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1.0): + initializer_factor (`float`, *optional*, defaults to 1.0): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). Example: diff --git a/src/transformers/models/clipseg/configuration_clipseg.py b/src/transformers/models/clipseg/configuration_clipseg.py index 56b90f721e..cb178514b2 100644 --- a/src/transformers/models/clipseg/configuration_clipseg.py +++ b/src/transformers/models/clipseg/configuration_clipseg.py @@ -62,7 +62,7 @@ class CLIPSegTextConfig(PretrainedConfig): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1.0): + initializer_factor (`float`, *optional*, defaults to 1.0): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). pad_token_id (`int`, *optional*, defaults to 1): diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index ccc1c9c2cf..1fe2414739 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -93,7 +93,7 @@ class Mask2FormerConfig(PretrainedConfig): Ratio of points that are sampled via importance sampling. init_std (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - init_xavier_std (`float``, *optional*, defaults to 1.0): + init_xavier_std (`float`, *optional*, defaults to 1.0): The scaling factor used for the Xavier initialization gain in the HM Attention map module. use_auxiliary_loss (`boolean``, *optional*, defaults to `True`): If `True` [`Mask2FormerForUniversalSegmentationOutput`] will contain the auxiliary losses computed using diff --git a/src/transformers/models/owlv2/configuration_owlv2.py b/src/transformers/models/owlv2/configuration_owlv2.py index b4d7526128..54611c4daf 100644 --- a/src/transformers/models/owlv2/configuration_owlv2.py +++ b/src/transformers/models/owlv2/configuration_owlv2.py @@ -180,7 +180,7 @@ class Owlv2VisionConfig(PretrainedConfig): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1.0): + initializer_factor (`float`, *optional*, defaults to 1.0): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). diff --git a/src/transformers/models/owlvit/configuration_owlvit.py b/src/transformers/models/owlvit/configuration_owlvit.py index 424c4adc17..f9d3914ed8 100644 --- a/src/transformers/models/owlvit/configuration_owlvit.py +++ b/src/transformers/models/owlvit/configuration_owlvit.py @@ -183,7 +183,7 @@ class OwlViTVisionConfig(PretrainedConfig): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1.0): + initializer_factor (`float`, *optional*, defaults to 1.0): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). diff --git a/src/transformers/models/pix2struct/configuration_pix2struct.py b/src/transformers/models/pix2struct/configuration_pix2struct.py index feb5397a2a..c23d2cdcfe 100644 --- a/src/transformers/models/pix2struct/configuration_pix2struct.py +++ b/src/transformers/models/pix2struct/configuration_pix2struct.py @@ -203,7 +203,7 @@ class Pix2StructVisionConfig(PretrainedConfig): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 1e-10): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1.0): + initializer_factor (`float`, *optional*, defaults to 1.0): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). seq_len (`int`, *optional*, defaults to 4096): diff --git a/src/transformers/models/x_clip/configuration_x_clip.py b/src/transformers/models/x_clip/configuration_x_clip.py index 183b66439b..fbccdf1167 100644 --- a/src/transformers/models/x_clip/configuration_x_clip.py +++ b/src/transformers/models/x_clip/configuration_x_clip.py @@ -63,7 +63,7 @@ class XCLIPTextConfig(PretrainedConfig): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1): + initializer_factor (`float`, *optional*, defaults to 1): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). @@ -176,7 +176,7 @@ class XCLIPVisionConfig(PretrainedConfig): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1): + initializer_factor (`float`, *optional*, defaults to 1): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). drop_path_rate (`float`, *optional*, defaults to 0.0): diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 42223e586f..3d8a2881bf 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -79,8 +79,6 @@ OBJECTS_TO_IGNORE = [ "AlbertTokenizerFast", "AlignTextModel", "AlignVisionConfig", - "AltCLIPTextConfig", - "AltCLIPVisionConfig", "AudioClassificationPipeline", "AutoformerConfig", "AutomaticSpeechRecognitionPipeline",