From 816c2237c1fd5540c57f99b3faf6160a67a31505 Mon Sep 17 00:00:00 2001
From: Sparty <vignesh.crysis@gmail.com>
Date: Thu, 19 Oct 2023 04:52:14 -0400
Subject: [PATCH] [docstring] Fix docstring for `ChineseCLIP` (#26880)

* Remove ChineseCLIPImageProcessor, ChineseCLIPTextConfig, ChineseCLIPVisionConfig from check_docstrings

* Run fix_and_overwrite for ChineseCLIPImageProcessor, ChineseCLIPTextConfig, ChineseCLIPVisionConfig

* Replace <fill_type> and <fill_docstring> in configuration_chinese_clip.py, image_processing_chinese_clip.py with type and docstring values

---------

Co-authored-by: vignesh-raghunathan <vignesh_raghunathan@intuit.com>
---
 .../chinese_clip/configuration_chinese_clip.py      | 13 +++++++++++--
 .../chinese_clip/image_processing_chinese_clip.py   |  4 ++--
 utils/check_docstrings.py                           |  3 ---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/transformers/models/chinese_clip/configuration_chinese_clip.py b/src/transformers/models/chinese_clip/configuration_chinese_clip.py
index cbbf429e1b..7bcfc73799 100644
--- a/src/transformers/models/chinese_clip/configuration_chinese_clip.py
+++ b/src/transformers/models/chinese_clip/configuration_chinese_clip.py
@@ -75,8 +75,13 @@ class ChineseCLIPTextConfig(PretrainedConfig):
             The vocabulary size of the `token_type_ids` passed when calling [`ChineseCLIPModel`].
         initializer_range (`float`, *optional*, defaults to 0.02):
             The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        initializer_factor (`float`, *optional*, defaults to 1.0):
+            A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
+            testing).
         layer_norm_eps (`float`, *optional*, defaults to 1e-12):
             The epsilon used by the layer normalization layers.
+        pad_token_id (`int`, *optional*, defaults to 0):
+            Padding token id.
         position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
             Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For
             positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
@@ -177,10 +182,14 @@ class ChineseCLIPVisionConfig(PretrainedConfig):
             Dimensionality of the encoder layers and the pooler layer.
         intermediate_size (`int`, *optional*, defaults to 3072):
             Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+        projection_dim (`int`, *optional*, defaults to 512):
+            Dimentionality of text and vision projection layers.
         num_hidden_layers (`int`, *optional*, defaults to 12):
             Number of hidden layers in the Transformer encoder.
         num_attention_heads (`int`, *optional*, defaults to 12):
             Number of attention heads for each attention layer in the Transformer encoder.
+        num_channels (`int`, *optional*, defaults to 3):
+            The number of input channels.
         image_size (`int`, *optional*, defaults to 224):
             The size (resolution) of each image.
         patch_size (`int`, *optional*, defaults to 32):
@@ -188,13 +197,13 @@ class ChineseCLIPVisionConfig(PretrainedConfig):
         hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
             The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
             `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported.
-        layer_norm_eps (`float`, *optional*, defaults to 1e-5):
+        layer_norm_eps (`float`, *optional*, defaults to 1e-05):
             The epsilon used by the layer normalization layers.
         attention_dropout (`float`, *optional*, defaults to 0.0):
             The dropout ratio for the attention probabilities.
         initializer_range (`float`, *optional*, defaults to 0.02):
             The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        initializer_factor (`float``, *optional*, defaults to 1):
+        initializer_factor (`float``, *optional*, defaults to 1.0):
             A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
             testing).
     Example:
diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
index 5f843ae5d8..4f1048a45e 100644
--- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
+++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
@@ -59,7 +59,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
             Size of the image after resizing. The shortest edge of the image is resized to size["shortest_edge"], with
             the longest edge resized to keep the input aspect ratio. Can be overridden by `size` in the `preprocess`
             method.
-        resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
+        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
             Resampling filter to use if resizing the image. Can be overridden by `resample` in the `preprocess` method.
         do_center_crop (`bool`, *optional*, defaults to `True`):
             Whether to center crop the image to the specified `crop_size`. Can be overridden by `do_center_crop` in the
@@ -73,7 +73,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor):
         rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
             Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
             method.
-        do_normalize:
+        do_normalize (`bool`, *optional*, defaults to `True`):
             Whether to normalize the image. Can be overridden by `do_normalize` in the `preprocess` method.
         image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
             Mean to use if normalizing the image. This is a float or list of floats the length of the number of
diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py
index 67a89193d4..0115f0ce40 100644
--- a/utils/check_docstrings.py
+++ b/utils/check_docstrings.py
@@ -121,10 +121,7 @@ OBJECTS_TO_IGNORE = [
     "CamembertTokenizerFast",
     "CanineModel",
     "CanineTokenizer",
-    "ChineseCLIPImageProcessor",
-    "ChineseCLIPTextConfig",
     "ChineseCLIPTextModel",
-    "ChineseCLIPVisionConfig",
     "ClapTextConfig",
     "CodeGenConfig",
     "CodeGenTokenizer",