From 816c2237c1fd5540c57f99b3faf6160a67a31505 Mon Sep 17 00:00:00 2001 From: Sparty Date: Thu, 19 Oct 2023 04:52:14 -0400 Subject: [PATCH] [docstring] Fix docstring for `ChineseCLIP` (#26880) * Remove ChineseCLIPImageProcessor, ChineseCLIPTextConfig, ChineseCLIPVisionConfig from check_docstrings * Run fix_and_overwrite for ChineseCLIPImageProcessor, ChineseCLIPTextConfig, ChineseCLIPVisionConfig * Replace and in configuration_chinese_clip.py, image_processing_chinese_clip.py with type and docstring values --------- Co-authored-by: vignesh-raghunathan --- .../chinese_clip/configuration_chinese_clip.py | 13 +++++++++++-- .../chinese_clip/image_processing_chinese_clip.py | 4 ++-- utils/check_docstrings.py | 3 --- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/chinese_clip/configuration_chinese_clip.py b/src/transformers/models/chinese_clip/configuration_chinese_clip.py index cbbf429e1b..7bcfc73799 100644 --- a/src/transformers/models/chinese_clip/configuration_chinese_clip.py +++ b/src/transformers/models/chinese_clip/configuration_chinese_clip.py @@ -75,8 +75,13 @@ class ChineseCLIPTextConfig(PretrainedConfig): The vocabulary size of the `token_type_ids` passed when calling [`ChineseCLIPModel`]. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + initializer_factor (`float`, *optional*, defaults to 1.0): + A factor for initializing all weight matrices (should be kept to 1, used internally for initialization + testing). layer_norm_eps (`float`, *optional*, defaults to 1e-12): The epsilon used by the layer normalization layers. + pad_token_id (`int`, *optional*, defaults to 0): + Padding token id. position_embedding_type (`str`, *optional*, defaults to `"absolute"`): Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to @@ -177,10 +182,14 @@ class ChineseCLIPVisionConfig(PretrainedConfig): Dimensionality of the encoder layers and the pooler layer. intermediate_size (`int`, *optional*, defaults to 3072): Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder. + projection_dim (`int`, *optional*, defaults to 512): + Dimentionality of text and vision projection layers. num_hidden_layers (`int`, *optional*, defaults to 12): Number of hidden layers in the Transformer encoder. num_attention_heads (`int`, *optional*, defaults to 12): Number of attention heads for each attention layer in the Transformer encoder. + num_channels (`int`, *optional*, defaults to 3): + The number of input channels. image_size (`int`, *optional*, defaults to 224): The size (resolution) of each image. patch_size (`int`, *optional*, defaults to 32): @@ -188,13 +197,13 @@ class ChineseCLIPVisionConfig(PretrainedConfig): hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`): The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported. - layer_norm_eps (`float`, *optional*, defaults to 1e-5): + layer_norm_eps (`float`, *optional*, defaults to 1e-05): The epsilon used by the layer normalization layers. attention_dropout (`float`, *optional*, defaults to 0.0): The dropout ratio for the attention probabilities. initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. - initializer_factor (`float``, *optional*, defaults to 1): + initializer_factor (`float``, *optional*, defaults to 1.0): A factor for initializing all weight matrices (should be kept to 1, used internally for initialization testing). Example: diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index 5f843ae5d8..4f1048a45e 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -59,7 +59,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): Size of the image after resizing. The shortest edge of the image is resized to size["shortest_edge"], with the longest edge resized to keep the input aspect ratio. Can be overridden by `size` in the `preprocess` method. - resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`): + resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`): Resampling filter to use if resizing the image. Can be overridden by `resample` in the `preprocess` method. do_center_crop (`bool`, *optional*, defaults to `True`): Whether to center crop the image to the specified `crop_size`. Can be overridden by `do_center_crop` in the @@ -73,7 +73,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): rescale_factor (`int` or `float`, *optional*, defaults to `1/255`): Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess` method. - do_normalize: + do_normalize (`bool`, *optional*, defaults to `True`): Whether to normalize the image. Can be overridden by `do_normalize` in the `preprocess` method. image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`): Mean to use if normalizing the image. This is a float or list of floats the length of the number of diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 67a89193d4..0115f0ce40 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -121,10 +121,7 @@ OBJECTS_TO_IGNORE = [ "CamembertTokenizerFast", "CanineModel", "CanineTokenizer", - "ChineseCLIPImageProcessor", - "ChineseCLIPTextConfig", "ChineseCLIPTextModel", - "ChineseCLIPVisionConfig", "ClapTextConfig", "CodeGenConfig", "CodeGenTokenizer",