From 803475fb69097e802985eb4f85b52199c66a52de Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Thu, 27 Oct 2022 09:26:10 +0200
Subject: [PATCH] Add checkpoint links in a few config classes (#19910)

* For CLIP

* Others

* update

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 src/transformers/models/clip/configuration_clip.py         | 4 +++-
 src/transformers/models/groupvit/configuration_groupvit.py | 3 ++-
 src/transformers/models/owlvit/configuration_owlvit.py     | 3 ++-
 src/transformers/models/x_clip/configuration_x_clip.py     | 2 ++
 utils/check_config_docstrings.py                           | 4 ----
 5 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/transformers/models/clip/configuration_clip.py b/src/transformers/models/clip/configuration_clip.py
index e71e87fad6..7dad241212 100644
--- a/src/transformers/models/clip/configuration_clip.py
+++ b/src/transformers/models/clip/configuration_clip.py
@@ -254,7 +254,9 @@ class CLIPVisionConfig(PretrainedConfig):
 class CLIPConfig(PretrainedConfig):
     r"""
     [`CLIPConfig`] is the configuration class to store the configuration of a [`CLIPModel`]. It is used to instantiate
-    CLIP model according to the specified arguments, defining the text model and vision model configs.
+    CLIP model according to the specified arguments, defining the text model and vision model configs. Instantiating a
+    configuration with the defaults will yield a similar configuration to that of the CLIP
+    [openai/clip-vit-base-patch32](https://huggingface.co/openai/clip-vit-base-patch32) architecture.
 
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
diff --git a/src/transformers/models/groupvit/configuration_groupvit.py b/src/transformers/models/groupvit/configuration_groupvit.py
index efb37dbce8..7196f2250d 100644
--- a/src/transformers/models/groupvit/configuration_groupvit.py
+++ b/src/transformers/models/groupvit/configuration_groupvit.py
@@ -273,7 +273,8 @@ class GroupViTConfig(PretrainedConfig):
     r"""
     [`GroupViTConfig`] is the configuration class to store the configuration of a [`GroupViTModel`]. It is used to
     instantiate a GroupViT model according to the specified arguments, defining the text model and vision model
-    configs.
+    configs. Instantiating a configuration with the defaults will yield a similar configuration to that of the GroupViT
+    [nvidia/groupvit-gcc-yfcc](https://huggingface.co/nvidia/groupvit-gcc-yfcc) architecture.
 
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
diff --git a/src/transformers/models/owlvit/configuration_owlvit.py b/src/transformers/models/owlvit/configuration_owlvit.py
index 1725c4dd84..5ab79c1bf0 100644
--- a/src/transformers/models/owlvit/configuration_owlvit.py
+++ b/src/transformers/models/owlvit/configuration_owlvit.py
@@ -253,7 +253,8 @@ class OwlViTConfig(PretrainedConfig):
     r"""
     [`OwlViTConfig`] is the configuration class to store the configuration of an [`OwlViTModel`]. It is used to
     instantiate an OWL-ViT model according to the specified arguments, defining the text model and vision model
-    configs.
+    configs. Instantiating a configuration with the defaults will yield a similar configuration to that of the OWL-ViT
+    [google/owlvit-base-patch32](https://huggingface.co/google/owlvit-base-patch32) architecture.
 
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
diff --git a/src/transformers/models/x_clip/configuration_x_clip.py b/src/transformers/models/x_clip/configuration_x_clip.py
index 30f9214eb8..2e5297ecb1 100644
--- a/src/transformers/models/x_clip/configuration_x_clip.py
+++ b/src/transformers/models/x_clip/configuration_x_clip.py
@@ -272,6 +272,8 @@ class XCLIPConfig(PretrainedConfig):
     r"""
     [`XCLIPConfig`] is the configuration class to store the configuration of a [`XCLIPModel`]. It is used to
     instantiate X-CLIP model according to the specified arguments, defining the text model and vision model configs.
+    Instantiating a configuration with the defaults will yield a similar configuration to that of the X-CLIP
+    [microsoft/xclip-base-patch32](https://huggingface.co/microsoft/xclip-base-patch32) architecture.
 
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
diff --git a/utils/check_config_docstrings.py b/utils/check_config_docstrings.py
index b0e5d1ced5..5fe39d7a90 100644
--- a/utils/check_config_docstrings.py
+++ b/utils/check_config_docstrings.py
@@ -40,16 +40,12 @@ _re_checkpoint = re.compile("\[(.+?)\]\((https://huggingface\.co/.+?)\)")
 
 
 CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = {
-    "CLIPConfig",
-    "OwlViTConfig",
-    "GroupViTConfig",
     "DecisionTransformerConfig",
     "EncoderDecoderConfig",
     "RagConfig",
     "SpeechEncoderDecoderConfig",
     "VisionEncoderDecoderConfig",
     "VisionTextDualEncoderConfig",
-    "XCLIPConfig",
 }