[docstring] Fix docstrings for CLIP (#26691)
fix docstrings for vanilla clip
This commit is contained in:
@@ -168,10 +168,14 @@ class CLIPVisionConfig(PretrainedConfig):
|
|||||||
Dimensionality of the encoder layers and the pooler layer.
|
Dimensionality of the encoder layers and the pooler layer.
|
||||||
intermediate_size (`int`, *optional*, defaults to 3072):
|
intermediate_size (`int`, *optional*, defaults to 3072):
|
||||||
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
||||||
|
projection_dim (`int`, *optional*, defaults to 512):
|
||||||
|
Dimentionality of text and vision projection layers.
|
||||||
num_hidden_layers (`int`, *optional*, defaults to 12):
|
num_hidden_layers (`int`, *optional*, defaults to 12):
|
||||||
Number of hidden layers in the Transformer encoder.
|
Number of hidden layers in the Transformer encoder.
|
||||||
num_attention_heads (`int`, *optional*, defaults to 12):
|
num_attention_heads (`int`, *optional*, defaults to 12):
|
||||||
Number of attention heads for each attention layer in the Transformer encoder.
|
Number of attention heads for each attention layer in the Transformer encoder.
|
||||||
|
num_channels (`int`, *optional*, defaults to 3):
|
||||||
|
The number of input channels.
|
||||||
image_size (`int`, *optional*, defaults to 224):
|
image_size (`int`, *optional*, defaults to 224):
|
||||||
The size (resolution) of each image.
|
The size (resolution) of each image.
|
||||||
patch_size (`int`, *optional*, defaults to 32):
|
patch_size (`int`, *optional*, defaults to 32):
|
||||||
@@ -179,13 +183,13 @@ class CLIPVisionConfig(PretrainedConfig):
|
|||||||
hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
|
hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
|
||||||
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
|
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
|
||||||
`"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported.
|
`"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported.
|
||||||
layer_norm_eps (`float`, *optional*, defaults to 1e-5):
|
layer_norm_eps (`float`, *optional*, defaults to 1e-05):
|
||||||
The epsilon used by the layer normalization layers.
|
The epsilon used by the layer normalization layers.
|
||||||
attention_dropout (`float`, *optional*, defaults to 0.0):
|
attention_dropout (`float`, *optional*, defaults to 0.0):
|
||||||
The dropout ratio for the attention probabilities.
|
The dropout ratio for the attention probabilities.
|
||||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||||
initializer_factor (`float`, *optional*, defaults to 1):
|
initializer_factor (`float`, *optional*, defaults to 1.0):
|
||||||
A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
|
A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
|
||||||
testing).
|
testing).
|
||||||
|
|
||||||
|
|||||||
@@ -284,13 +284,15 @@ class CLIPTokenizer(PreTrainedTokenizer):
|
|||||||
errors (`str`, *optional*, defaults to `"replace"`):
|
errors (`str`, *optional*, defaults to `"replace"`):
|
||||||
Paradigm to follow when decoding bytes to UTF-8. See
|
Paradigm to follow when decoding bytes to UTF-8. See
|
||||||
[bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information.
|
[bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information.
|
||||||
unk_token (`str`, *optional*, defaults to `<|endoftext|>`):
|
unk_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
|
||||||
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
||||||
token instead.
|
token instead.
|
||||||
bos_token (`str`, *optional*, defaults to `<|startoftext|>`):
|
bos_token (`str`, *optional*, defaults to `"<|startoftext|>"`):
|
||||||
The beginning of sequence token.
|
The beginning of sequence token.
|
||||||
eos_token (`str`, *optional*, defaults to `<|endoftext|>`):
|
eos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
|
||||||
The end of sequence token.
|
The end of sequence token.
|
||||||
|
pad_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
|
||||||
|
The token used for padding, for example when batching sequences of different lengths.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
|
|||||||
@@ -56,17 +56,21 @@ class CLIPTokenizerFast(PreTrainedTokenizerFast):
|
|||||||
refer to this superclass for more information regarding those methods.
|
refer to this superclass for more information regarding those methods.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
vocab_file (`str`):
|
vocab_file (`str`, *optional*):
|
||||||
Path to the vocabulary file.
|
Path to the vocabulary file.
|
||||||
merges_file (`str`):
|
merges_file (`str`, *optional*):
|
||||||
Path to the merges file.
|
Path to the merges file.
|
||||||
unk_token (`str`, *optional*, defaults to `<|endoftext|>`):
|
tokenizer_file (`str`, *optional*):
|
||||||
|
The path to a tokenizer file to use instead of the vocab file.
|
||||||
|
unk_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
|
||||||
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
||||||
token instead.
|
token instead.
|
||||||
bos_token (`str`, *optional*, defaults to `<|startoftext|>`):
|
bos_token (`str`, *optional*, defaults to `"<|startoftext|>"`):
|
||||||
The beginning of sequence token.
|
The beginning of sequence token.
|
||||||
eos_token (`str`, *optional*, defaults to `<|endoftext|>`):
|
eos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
|
||||||
The end of sequence token.
|
The end of sequence token.
|
||||||
|
pad_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
|
||||||
|
The token used for padding, for example when batching sequences of different lengths.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
|
|||||||
@@ -118,9 +118,6 @@ OBJECTS_TO_IGNORE = [
|
|||||||
"BridgeTowerTextConfig",
|
"BridgeTowerTextConfig",
|
||||||
"BridgeTowerVisionConfig",
|
"BridgeTowerVisionConfig",
|
||||||
"BrosModel",
|
"BrosModel",
|
||||||
"CLIPTokenizer",
|
|
||||||
"CLIPTokenizerFast",
|
|
||||||
"CLIPVisionConfig",
|
|
||||||
"CamembertConfig",
|
"CamembertConfig",
|
||||||
"CamembertModel",
|
"CamembertModel",
|
||||||
"CamembertTokenizerFast",
|
"CamembertTokenizerFast",
|
||||||
|
|||||||
Reference in New Issue
Block a user