From 803475fb69097e802985eb4f85b52199c66a52de Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 27 Oct 2022 09:26:10 +0200 Subject: [PATCH] Add checkpoint links in a few config classes (#19910) * For CLIP * Others * update Co-authored-by: ydshieh --- src/transformers/models/clip/configuration_clip.py | 4 +++- src/transformers/models/groupvit/configuration_groupvit.py | 3 ++- src/transformers/models/owlvit/configuration_owlvit.py | 3 ++- src/transformers/models/x_clip/configuration_x_clip.py | 2 ++ utils/check_config_docstrings.py | 4 ---- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/clip/configuration_clip.py b/src/transformers/models/clip/configuration_clip.py index e71e87fad6..7dad241212 100644 --- a/src/transformers/models/clip/configuration_clip.py +++ b/src/transformers/models/clip/configuration_clip.py @@ -254,7 +254,9 @@ class CLIPVisionConfig(PretrainedConfig): class CLIPConfig(PretrainedConfig): r""" [`CLIPConfig`] is the configuration class to store the configuration of a [`CLIPModel`]. It is used to instantiate - CLIP model according to the specified arguments, defining the text model and vision model configs. + CLIP model according to the specified arguments, defining the text model and vision model configs. Instantiating a + configuration with the defaults will yield a similar configuration to that of the CLIP + [openai/clip-vit-base-patch32](https://huggingface.co/openai/clip-vit-base-patch32) architecture. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. diff --git a/src/transformers/models/groupvit/configuration_groupvit.py b/src/transformers/models/groupvit/configuration_groupvit.py index efb37dbce8..7196f2250d 100644 --- a/src/transformers/models/groupvit/configuration_groupvit.py +++ b/src/transformers/models/groupvit/configuration_groupvit.py @@ -273,7 +273,8 @@ class GroupViTConfig(PretrainedConfig): r""" [`GroupViTConfig`] is the configuration class to store the configuration of a [`GroupViTModel`]. It is used to instantiate a GroupViT model according to the specified arguments, defining the text model and vision model - configs. + configs. Instantiating a configuration with the defaults will yield a similar configuration to that of the GroupViT + [nvidia/groupvit-gcc-yfcc](https://huggingface.co/nvidia/groupvit-gcc-yfcc) architecture. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. diff --git a/src/transformers/models/owlvit/configuration_owlvit.py b/src/transformers/models/owlvit/configuration_owlvit.py index 1725c4dd84..5ab79c1bf0 100644 --- a/src/transformers/models/owlvit/configuration_owlvit.py +++ b/src/transformers/models/owlvit/configuration_owlvit.py @@ -253,7 +253,8 @@ class OwlViTConfig(PretrainedConfig): r""" [`OwlViTConfig`] is the configuration class to store the configuration of an [`OwlViTModel`]. It is used to instantiate an OWL-ViT model according to the specified arguments, defining the text model and vision model - configs. + configs. Instantiating a configuration with the defaults will yield a similar configuration to that of the OWL-ViT + [google/owlvit-base-patch32](https://huggingface.co/google/owlvit-base-patch32) architecture. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. diff --git a/src/transformers/models/x_clip/configuration_x_clip.py b/src/transformers/models/x_clip/configuration_x_clip.py index 30f9214eb8..2e5297ecb1 100644 --- a/src/transformers/models/x_clip/configuration_x_clip.py +++ b/src/transformers/models/x_clip/configuration_x_clip.py @@ -272,6 +272,8 @@ class XCLIPConfig(PretrainedConfig): r""" [`XCLIPConfig`] is the configuration class to store the configuration of a [`XCLIPModel`]. It is used to instantiate X-CLIP model according to the specified arguments, defining the text model and vision model configs. + Instantiating a configuration with the defaults will yield a similar configuration to that of the X-CLIP + [microsoft/xclip-base-patch32](https://huggingface.co/microsoft/xclip-base-patch32) architecture. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. diff --git a/utils/check_config_docstrings.py b/utils/check_config_docstrings.py index b0e5d1ced5..5fe39d7a90 100644 --- a/utils/check_config_docstrings.py +++ b/utils/check_config_docstrings.py @@ -40,16 +40,12 @@ _re_checkpoint = re.compile("\[(.+?)\]\((https://huggingface\.co/.+?)\)") CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = { - "CLIPConfig", - "OwlViTConfig", - "GroupViTConfig", "DecisionTransformerConfig", "EncoderDecoderConfig", "RagConfig", "SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig", - "XCLIPConfig", }