Load sub-configs from composite configs (#34410)

* save/load sub-configs

* nit forgot these

* fix copies

* move test to common

* use dict for sub-configs

* add load-save-laod test

* clean up modeling check

* oops this are correct keys

* fix some tests, missed some composite configs

* this model was missed
This commit is contained in:
Raushan Turganbay
2024-11-05 11:34:01 +01:00
committed by GitHub
parent 5e1fd4e204
commit 893ad04fad
78 changed files with 464 additions and 1052 deletions

View File

@@ -190,6 +190,8 @@ class PretrainedConfig(PushToHubMixin):
"""
model_type: str = ""
base_config_key: str = ""
sub_configs: Dict[str, "PretrainedConfig"] = {}
is_composition: bool = False
attribute_map: Dict[str, str] = {}
_auto_class: Optional[str] = None
@@ -543,11 +545,22 @@ class PretrainedConfig(PushToHubMixin):
cls._set_token_in_kwargs(kwargs, token)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if cls.base_config_key and cls.base_config_key in config_dict:
config_dict = config_dict[cls.base_config_key]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
# sometimes the config has no `base_config_key` if the config is used in several composite models
# e.g. LlamaConfig. In that case we try to see if there is match in `model_type` before raising a warning
for k, v in config_dict.items():
if isinstance(v, dict) and v.get("model_type") == cls.model_type:
config_dict = v
# raise warning only if we still can't see a match in `model_type`
if config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)

View File

@@ -1608,15 +1608,14 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
# Below we check if a config is composite and manually prepare a dict of attn impl if not already passed as a dict.
# Later each sub-module will dispatch with its own attn impl, by calling `XXXModel._from_config(config.text_config)`
# If any of sub-modules doesn't support requested attn, an error will be raised. See https://github.com/huggingface/transformers/pull/32238
for key in config:
if isinstance(getattr(config, key), PretrainedConfig):
sub_config = getattr(config, key)
curr_attn_implementation = (
requested_attn_implementation
if not isinstance(requested_attn_implementation, dict)
else requested_attn_implementation.get(key, None)
)
sub_config._attn_implementation_internal = curr_attn_implementation
for key in config.sub_configs.keys():
sub_config = getattr(config, key)
curr_attn_implementation = (
requested_attn_implementation
if not isinstance(requested_attn_implementation, dict)
else requested_attn_implementation.get(key, None)
)
sub_config._attn_implementation_internal = curr_attn_implementation
if use_flash_attention_2:
logger.warning_once(

View File

@@ -14,8 +14,7 @@
# limitations under the License.
"""ALIGN model configuration"""
import os
from typing import TYPE_CHECKING, List, Union
from typing import TYPE_CHECKING, List
if TYPE_CHECKING:
@@ -95,6 +94,7 @@ class AlignTextConfig(PretrainedConfig):
```"""
model_type = "align_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -133,24 +133,6 @@ class AlignTextConfig(PretrainedConfig):
self.use_cache = use_cache
self.pad_token_id = pad_token_id
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from AlignConfig
if config_dict.get("model_type") == "align":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class AlignVisionConfig(PretrainedConfig):
r"""
@@ -223,6 +205,7 @@ class AlignVisionConfig(PretrainedConfig):
```"""
model_type = "align_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -272,24 +255,6 @@ class AlignVisionConfig(PretrainedConfig):
self.drop_connect_rate = drop_connect_rate
self.num_hidden_layers = sum(num_block_repeats) * 4
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from AlignConfig
if config_dict.get("model_type") == "align":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class AlignConfig(PretrainedConfig):
r"""
@@ -340,6 +305,7 @@ class AlignConfig(PretrainedConfig):
```"""
model_type = "align"
sub_configs = {"text_config": AlignTextConfig, "vision_config": AlignVisionConfig}
def __init__(
self,

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""AltCLIP model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -199,6 +196,7 @@ class AltCLIPVisionConfig(PretrainedConfig):
```"""
model_type = "altclip_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -233,24 +231,6 @@ class AltCLIPVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from AltCLIPConfig
if config_dict.get("model_type") == "altclip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class AltCLIPConfig(PretrainedConfig):
r"""
@@ -298,6 +278,7 @@ class AltCLIPConfig(PretrainedConfig):
```"""
model_type = "altclip"
sub_configs = {"text_config": AltCLIPTextConfig, "vision_config": AltCLIPVisionConfig}
def __init__(
self, text_config=None, vision_config=None, projection_dim=768, logit_scale_init_value=2.6592, **kwargs

View File

@@ -14,12 +14,11 @@
# limitations under the License.
"""BARK model configuration"""
import os
from typing import Dict, Optional, Union
from typing import Dict
from ...configuration_utils import PretrainedConfig
from ...utils import add_start_docstrings, logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -64,7 +63,6 @@ BARK_SUBMODELCONFIG_START_DOCSTRING = """
class BarkSubModelConfig(PretrainedConfig):
model_type = "bark_module"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {
@@ -101,38 +99,6 @@ class BarkSubModelConfig(PretrainedConfig):
super().__init__(**kwargs)
@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: Union[str, os.PathLike],
cache_dir: Optional[Union[str, os.PathLike]] = None,
force_download: bool = False,
local_files_only: bool = False,
token: Optional[Union[str, bool]] = None,
revision: str = "main",
**kwargs,
) -> "PretrainedConfig":
kwargs["cache_dir"] = cache_dir
kwargs["force_download"] = force_download
kwargs["local_files_only"] = local_files_only
kwargs["revision"] = revision
cls._set_token_in_kwargs(kwargs, token)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the config dict if we are loading from Bark
if config_dict.get("model_type") == "bark":
config_dict = config_dict[f"{cls.model_type}_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
@add_start_docstrings(
BARK_SUBMODELCONFIG_START_DOCSTRING.format(config="BarkSemanticConfig", model="BarkSemanticModel"),
@@ -154,6 +120,7 @@ class BarkSubModelConfig(PretrainedConfig):
)
class BarkSemanticConfig(BarkSubModelConfig):
model_type = "semantic"
base_config_key = "semantic_config"
@add_start_docstrings(
@@ -176,6 +143,7 @@ class BarkSemanticConfig(BarkSubModelConfig):
)
class BarkCoarseConfig(BarkSubModelConfig):
model_type = "coarse_acoustics"
base_config_key = "coarse_acoustics_config"
@add_start_docstrings(
@@ -203,6 +171,7 @@ class BarkCoarseConfig(BarkSubModelConfig):
)
class BarkFineConfig(BarkSubModelConfig):
model_type = "fine_acoustics"
base_config_key = "fine_acoustics_config"
def __init__(self, tie_word_embeddings=True, n_codes_total=8, n_codes_given=1, **kwargs):
self.n_codes_total = n_codes_total
@@ -265,6 +234,12 @@ class BarkConfig(PretrainedConfig):
"""
model_type = "bark"
sub_configs = {
"semantic_config": BarkSemanticConfig,
"coarse_acoustics_config": BarkCoarseConfig,
"fine_acoustics_config": BarkFineConfig,
"codec_config": AutoConfig,
}
def __init__(
self,

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""Blip model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -96,6 +93,7 @@ class BlipTextConfig(PretrainedConfig):
```"""
model_type = "blip_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -146,24 +144,6 @@ class BlipTextConfig(PretrainedConfig):
self.use_cache = use_cache
self.label_smoothing = label_smoothing
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from BlipConfig
if config_dict.get("model_type") == "blip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class BlipVisionConfig(PretrainedConfig):
r"""
@@ -215,6 +195,7 @@ class BlipVisionConfig(PretrainedConfig):
```"""
model_type = "blip_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -245,24 +226,6 @@ class BlipVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from BlipConfig
if config_dict.get("model_type") == "blip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class BlipConfig(PretrainedConfig):
r"""
@@ -316,6 +279,7 @@ class BlipConfig(PretrainedConfig):
```"""
model_type = "blip"
sub_configs = {"text_config": BlipTextConfig, "vision_config": BlipVisionConfig}
def __init__(
self,

View File

@@ -14,13 +14,12 @@
# limitations under the License.
"""BLIP-2 model configuration"""
import os
from typing import Optional, Union
from typing import Optional
from ...configuration_utils import PretrainedConfig
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -76,6 +75,7 @@ class Blip2VisionConfig(PretrainedConfig):
```"""
model_type = "blip_2_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -106,24 +106,6 @@ class Blip2VisionConfig(PretrainedConfig):
self.hidden_act = hidden_act
self.qkv_bias = qkv_bias
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Blip2Config
if config_dict.get("model_type") == "blip-2":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Blip2QFormerConfig(PretrainedConfig):
r"""
@@ -190,6 +172,7 @@ class Blip2QFormerConfig(PretrainedConfig):
```"""
model_type = "blip_2_qformer"
base_config_key = "qformer_config"
def __init__(
self,
@@ -229,24 +212,6 @@ class Blip2QFormerConfig(PretrainedConfig):
self.encoder_hidden_size = encoder_hidden_size
self.use_qformer_text_input = use_qformer_text_input
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the qformer config dict if we are loading from Blip2Config
if config_dict.get("model_type") == "blip-2":
config_dict = config_dict["qformer_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Blip2Config(PretrainedConfig):
r"""
@@ -306,6 +271,7 @@ class Blip2Config(PretrainedConfig):
```"""
model_type = "blip-2"
sub_configs = {"text_config": AutoConfig, "qformer_config": Blip2QFormerConfig, "vision_config": Blip2VisionConfig}
def __init__(
self,

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""BridgeTower model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -68,6 +65,7 @@ class BridgeTowerVisionConfig(PretrainedConfig):
```"""
model_type = "bridgetower_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -95,21 +93,6 @@ class BridgeTowerVisionConfig(PretrainedConfig):
self.share_layernorm = share_layernorm
self.remove_last_layer = remove_last_layer
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "bridgetower":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class BridgeTowerTextConfig(PretrainedConfig):
r"""
@@ -175,6 +158,7 @@ class BridgeTowerTextConfig(PretrainedConfig):
```"""
model_type = "bridgetower_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -217,21 +201,6 @@ class BridgeTowerTextConfig(PretrainedConfig):
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "bridgetower":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class BridgeTowerConfig(PretrainedConfig):
r"""
@@ -288,6 +257,7 @@ class BridgeTowerConfig(PretrainedConfig):
```"""
model_type = "bridgetower"
sub_configs = {"text_config": BridgeTowerTextConfig, "vision_config": BridgeTowerVisionConfig}
def __init__(
self,

View File

@@ -62,6 +62,7 @@ class ChameleonVQVAEConfig(PretrainedConfig):
"""
model_type = "chameleon_vqgan"
base_config_key = "vq_config"
def __init__(
self,
@@ -187,6 +188,7 @@ class ChameleonConfig(PretrainedConfig):
```"""
model_type = "chameleon"
sub_configs = {"vq_config": ChameleonVQVAEConfig}
keys_to_ignore_at_inference = ["past_key_values"]
def __init__(

View File

@@ -14,9 +14,8 @@
# limitations under the License.
"""Chinese-CLIP model configuration"""
import os
from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
from typing import TYPE_CHECKING, Any, Mapping, Optional
if TYPE_CHECKING:
@@ -102,6 +101,7 @@ class ChineseCLIPTextConfig(PretrainedConfig):
```"""
model_type = "chinese_clip_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -141,24 +141,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
self.position_embedding_type = position_embedding_type
self.use_cache = use_cache
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from ChineseCLIPConfig
if config_dict.get("model_type") == "chinese_clip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ChineseCLIPVisionConfig(PretrainedConfig):
r"""
@@ -215,6 +197,7 @@ class ChineseCLIPVisionConfig(PretrainedConfig):
```"""
model_type = "chinese_clip_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -249,24 +232,6 @@ class ChineseCLIPVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from ChineseCLIPConfig
if config_dict.get("model_type") == "chinese_clip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ChineseCLIPConfig(PretrainedConfig):
r"""
@@ -316,6 +281,7 @@ class ChineseCLIPConfig(PretrainedConfig):
```"""
model_type = "chinese_clip"
sub_configs = {"text_config": ChineseCLIPTextConfig, "vision_config": ChineseCLIPVisionConfig}
def __init__(
self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""CLAP model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -94,6 +91,7 @@ class ClapTextConfig(PretrainedConfig):
```"""
model_type = "clap_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -137,24 +135,6 @@ class ClapTextConfig(PretrainedConfig):
self.projection_hidden_act = projection_hidden_act
self.projection_dim = projection_dim
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from ClapConfig
if config_dict.get("model_type") == "clap":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ClapAudioConfig(PretrainedConfig):
r"""
@@ -245,6 +225,7 @@ class ClapAudioConfig(PretrainedConfig):
```"""
model_type = "clap_audio_model"
base_config_key = "audio_config"
def __init__(
self,
@@ -307,24 +288,6 @@ class ClapAudioConfig(PretrainedConfig):
self.initializer_factor = initializer_factor
self.projection_hidden_act = projection_hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the audio config dict if we are loading from ClapConfig
if config_dict.get("model_type") == "clap":
config_dict = config_dict["audio_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ClapConfig(PretrainedConfig):
r"""
@@ -377,6 +340,7 @@ class ClapConfig(PretrainedConfig):
```"""
model_type = "clap"
sub_configs = {"text_config": ClapTextConfig, "audio_config": ClapAudioConfig}
def __init__(
self,

View File

@@ -14,9 +14,8 @@
# limitations under the License.
"""CLIP model configuration"""
import os
from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
from typing import TYPE_CHECKING, Any, Mapping, Optional
if TYPE_CHECKING:
@@ -93,6 +92,7 @@ class CLIPTextConfig(PretrainedConfig):
```"""
model_type = "clip_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -130,24 +130,6 @@ class CLIPTextConfig(PretrainedConfig):
self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from CLIPConfig
if config_dict.get("model_type") == "clip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class CLIPVisionConfig(PretrainedConfig):
r"""
@@ -205,6 +187,7 @@ class CLIPVisionConfig(PretrainedConfig):
```"""
model_type = "clip_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -239,24 +222,6 @@ class CLIPVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from CLIPConfig
if config_dict.get("model_type") == "clip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class CLIPConfig(PretrainedConfig):
r"""
@@ -305,6 +270,7 @@ class CLIPConfig(PretrainedConfig):
```"""
model_type = "clip"
sub_configs = {"text_config": CLIPTextConfig, "vision_config": CLIPVisionConfig}
def __init__(
self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""CLIPSeg model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -84,6 +81,7 @@ class CLIPSegTextConfig(PretrainedConfig):
```"""
model_type = "clipseg_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -117,24 +115,6 @@ class CLIPSegTextConfig(PretrainedConfig):
self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from CLIPSegConfig
if config_dict.get("model_type") == "clipseg":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class CLIPSegVisionConfig(PretrainedConfig):
r"""
@@ -190,6 +170,7 @@ class CLIPSegVisionConfig(PretrainedConfig):
```"""
model_type = "clipseg_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -222,24 +203,6 @@ class CLIPSegVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from CLIPSegConfig
if config_dict.get("model_type") == "clipseg":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class CLIPSegConfig(PretrainedConfig):
r"""
@@ -306,6 +269,7 @@ class CLIPSegConfig(PretrainedConfig):
```"""
model_type = "clipseg"
sub_configs = {"text_config": CLIPSegTextConfig, "vision_config": CLIPSegVisionConfig}
def __init__(
self,

View File

@@ -91,6 +91,7 @@ class ClvpEncoderConfig(PretrainedConfig):
```"""
model_type = "clvp_encoder"
base_config_key = ["text_config", "speech_config"]
def __init__(
self,
@@ -141,7 +142,7 @@ class ClvpEncoderConfig(PretrainedConfig):
# make sure to have the config_type be either "text_config" or "speech_config"
# this is to make sure that we can load only text or speech configs from the nested ClvpConfig.
if config_type not in ["text_config", "speech_config"]:
if config_type not in cls.base_config_key:
raise ValueError(
f"We can only load either 'text_config' or 'speech_config' but you are trying to load" f"{config_type}"
)
@@ -253,6 +254,7 @@ class ClvpDecoderConfig(PretrainedConfig):
```"""
model_type = "clvp_decoder"
base_config_key = "decoder_config"
def __init__(
self,
@@ -314,24 +316,6 @@ class ClvpDecoderConfig(PretrainedConfig):
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the speech config dict if we are loading from ClvpConfig
if config_dict.get("model_type") == "clvp":
config_dict = config_dict["decoder_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ClvpConfig(PretrainedConfig):
r"""
@@ -386,7 +370,11 @@ class ClvpConfig(PretrainedConfig):
```"""
model_type = "clvp"
is_composition = True
sub_configs = {
"text_config": ClvpEncoderConfig,
"speech_config": ClvpEncoderConfig,
"decoder_config": ClvpDecoderConfig,
}
def __init__(
self,

View File

@@ -41,6 +41,8 @@ class DbrxAttentionConfig(PretrainedConfig):
rope_theta (`float`, *optional*, defaults to 10000.0): The base frequency for rope.
"""
base_config_key = "attn_config"
def __init__(
self,
attn_pdrop: float = 0.0,
@@ -55,29 +57,12 @@ class DbrxAttentionConfig(PretrainedConfig):
self.kv_n_heads = kv_n_heads
self.rope_theta = rope_theta
for k in ["model_type"]:
for k in ["model_type", "attn_implementation", "transformers_version", "_commit_hash"]:
if k in kwargs:
kwargs.pop(k)
if len(kwargs) != 0:
raise ValueError(f"Found unknown {kwargs=}")
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs: Any) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "dbrx":
config_dict = config_dict["attn_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class DbrxFFNConfig(PretrainedConfig):
"""Configuration class for Dbrx FFN.
@@ -100,6 +85,8 @@ class DbrxFFNConfig(PretrainedConfig):
moe_normalize_expert_weights (`float`, *optional*, defaults to 1.0): The normalization factor for the expert weights.
"""
base_config_key = "ffn_config"
def __init__(
self,
ffn_act_fn: dict = None,
@@ -122,29 +109,12 @@ class DbrxFFNConfig(PretrainedConfig):
self.moe_loss_weight = moe_loss_weight
self.moe_normalize_expert_weights = moe_normalize_expert_weights
for k in ["model_type"]:
for k in ["model_type", "attn_implementation", "transformers_version", "_commit_hash"]:
if k in kwargs:
kwargs.pop(k)
if len(kwargs) != 0:
raise ValueError(f"Found unknown {kwargs=}")
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs: Any) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "dbrx":
config_dict = config_dict["ffn_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class DbrxConfig(PretrainedConfig):
r"""
@@ -202,6 +172,7 @@ class DbrxConfig(PretrainedConfig):
"""
model_type = "dbrx"
sub_configs = {"attn_config": DbrxAttentionConfig, "ffn_config": DbrxFFNConfig}
attribute_map = {
"num_attention_heads": "n_heads",
"hidden_size": "d_model",

View File

@@ -17,6 +17,7 @@
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import AutoConfig
logger = logging.get_logger(__name__)
@@ -70,6 +71,7 @@ class EncoderDecoderConfig(PretrainedConfig):
```"""
model_type = "encoder-decoder"
sub_configs = {"encoder": AutoConfig, "decoder": AutoConfig}
is_composition = True
def __init__(self, **kwargs):
@@ -84,8 +86,6 @@ class EncoderDecoderConfig(PretrainedConfig):
decoder_config = kwargs.pop("decoder")
decoder_model_type = decoder_config.pop("model_type")
from ..auto.configuration_auto import AutoConfig
self.encoder = AutoConfig.for_model(encoder_model_type, **encoder_config)
self.decoder = AutoConfig.for_model(decoder_model_type, **decoder_config)
self.is_encoder_decoder = True

View File

@@ -164,6 +164,7 @@ class FastSpeech2ConformerConfig(PretrainedConfig):
```"""
model_type = "fastspeech2_conformer"
base_config_key = "model_config"
attribute_map = {"num_hidden_layers": "encoder_layers", "num_attention_heads": "encoder_num_attention_heads"}
def __init__(
@@ -377,6 +378,7 @@ class FastSpeech2ConformerHifiGanConfig(PretrainedConfig):
```"""
model_type = "hifigan"
base_config_key = "vocoder_config"
def __init__(
self,
@@ -453,7 +455,7 @@ class FastSpeech2ConformerWithHifiGanConfig(PretrainedConfig):
"""
model_type = "fastspeech2_conformer_with_hifigan"
is_composition = True
sub_configs = {"model_config": FastSpeech2ConformerConfig, "vocoder_config": FastSpeech2ConformerHifiGanConfig}
def __init__(
self,

View File

@@ -14,8 +14,7 @@
# limitations under the License.
"""FLAVA model configurations"""
import os
from typing import Any, Dict, Union
from typing import Any, Dict
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -86,6 +85,7 @@ class FlavaImageConfig(PretrainedConfig):
```"""
model_type = "flava_image_model"
base_config_key = "image_config"
def __init__(
self,
@@ -124,24 +124,6 @@ class FlavaImageConfig(PretrainedConfig):
self.mask_token = mask_token
self.vocab_size = vocab_size
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the image config dict if we are loading from FlavaConfig
if config_dict.get("model_type") == "flava":
config_dict = config_dict["image_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class FlavaTextConfig(PretrainedConfig):
r"""
@@ -216,6 +198,7 @@ class FlavaTextConfig(PretrainedConfig):
```"""
model_type = "flava_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -254,24 +237,6 @@ class FlavaTextConfig(PretrainedConfig):
self.qkv_bias = qkv_bias
self.pad_token_id = pad_token_id
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from FlavaConfig
if config_dict.get("model_type") == "flava":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class FlavaMultimodalConfig(PretrainedConfig):
r"""
@@ -327,6 +292,7 @@ class FlavaMultimodalConfig(PretrainedConfig):
```"""
model_type = "flava_multimodal_model"
base_config_key = "multimodal_config"
def __init__(
self,
@@ -357,27 +323,10 @@ class FlavaMultimodalConfig(PretrainedConfig):
self.qkv_bias = qkv_bias
self.use_cls_token = use_cls_token
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the multimodal config dict if we are loading from FlavaConfig
if config_dict.get("model_type") == "flava":
config_dict = config_dict["multimodal_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class FlavaImageCodebookConfig(PretrainedConfig):
model_type = "flava_image_codebook"
base_config_key = "image_codebook_config"
r"""
[`FlavaImageCodebookConfig`] is the configuration class to store the configuration of a [`FlavaImageCodebook`]. It
@@ -442,24 +391,6 @@ class FlavaImageCodebookConfig(PretrainedConfig):
self.freeze = freeze
self.initializer_range = initializer_range
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the image codebook config dict if we are loading from FlavaConfig
if config_dict.get("model_type") == "flava":
config_dict = config_dict["image_codebook_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class FlavaConfig(PretrainedConfig):
r"""
@@ -532,6 +463,12 @@ class FlavaConfig(PretrainedConfig):
"""
model_type = "flava"
sub_configs = {
"text_config": FlavaTextConfig,
"image_config": FlavaImageConfig,
"multimodal_config": FlavaMultimodalConfig,
"image_codebook_config": FlavaImageCodebookConfig,
}
def __init__(
self,

View File

@@ -13,8 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -72,6 +70,7 @@ class GitVisionConfig(PretrainedConfig):
```"""
model_type = "git_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -102,24 +101,6 @@ class GitVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from GITConfig
if config_dict.get("model_type") == "git":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class GitConfig(PretrainedConfig):
r"""
@@ -186,6 +167,7 @@ class GitConfig(PretrainedConfig):
```"""
model_type = "git"
sub_configs = {"vision_config": GitVisionConfig}
def __init__(
self,

View File

@@ -14,9 +14,8 @@
# limitations under the License.
"""GroupViT model configuration"""
import os
from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
from typing import TYPE_CHECKING, Any, Mapping, Optional
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig
@@ -86,6 +85,7 @@ class GroupViTTextConfig(PretrainedConfig):
```"""
model_type = "groupvit_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -121,24 +121,6 @@ class GroupViTTextConfig(PretrainedConfig):
self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from GroupViTConfig
if config_dict.get("model_type") == "groupvit":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class GroupViTVisionConfig(PretrainedConfig):
r"""
@@ -197,6 +179,7 @@ class GroupViTVisionConfig(PretrainedConfig):
```"""
model_type = "groupvit_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -246,24 +229,6 @@ class GroupViTVisionConfig(PretrainedConfig):
self.assign_eps = assign_eps
self.assign_mlp_ratio = assign_mlp_ratio
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from GroupViTConfig
if config_dict.get("model_type") == "groupvit":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class GroupViTConfig(PretrainedConfig):
r"""
@@ -292,6 +257,7 @@ class GroupViTConfig(PretrainedConfig):
"""
model_type = "groupvit"
sub_configs = {"text_config": GroupViTTextConfig, "vision_config": GroupViTVisionConfig}
def __init__(
self,

View File

@@ -38,7 +38,7 @@ class IdeficsVisionConfig(PretrainedConfig):
documentation from [`PretrainedConfig`] for more information.
Args:
hidden_size (`int`, *optional*, defaults to 768):
embed_dim (`int`, *optional*, defaults to 768):
Dimensionality of the encoder layers and the pooler layer. (elsewhere referred to as `hidden_size`)
image_size (`int`, *optional*, defaults to 224):
The size (resolution) of each image.
@@ -50,12 +50,12 @@ class IdeficsVisionConfig(PretrainedConfig):
Number of hidden layers in the Transformer encoder.
num_attention_heads (`int`, *optional*, defaults to 16):
Number of attention heads for each attention layer in the Transformer encoder.
image_num_channels (`int`, *optional*, defaults to `3`):
num_channels (`int`, *optional*, defaults to 3):
Number of image channels.
hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"selu"` and `"gelu_new"` `"quick_gelu"` are supported.
layer_norm_eps (`float`, *optional*, defaults to 1e-5):
layer_norm_eps (`float`, *optional*, defaults to 1e-05):
The epsilon used by the layer normalization layers.
attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
@@ -64,11 +64,9 @@ class IdeficsVisionConfig(PretrainedConfig):
initializer_factor (`float`, *optional*, defaults to 1.0):
A factor for initializing all weight matrices (should be kept to 1.0, used internally for initialization
testing).
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
"""
model_type = "idefics"
model_type = "idefics_vision"
attribute_map = {
"hidden_size": "embed_dim",
}
@@ -119,7 +117,7 @@ class IdeficsPerceiverConfig(PretrainedConfig):
Args:
use_resampler (`bool`, *optional*, defaults to `False`):
Whether or not to use the resampler
resampler_n_latents (`int`, *optional*, defaults to ):
resampler_n_latents (`int`, *optional*, defaults to 64):
Number of latent embeddings to resample ("compress") the input sequence to (usually < 128).
resampler_depth (`int`, *optional*, defaults to 6):
Depth of the Perceiver Resampler (Transformer w/ cross attention). Should be shallow (< 3).
@@ -131,7 +129,7 @@ class IdeficsPerceiverConfig(PretrainedConfig):
Whether or not to use qk layer norms in perceiver
"""
model_type = "idefics"
model_type = "idefics_perciever"
def __init__(
self,
@@ -235,7 +233,7 @@ class IdeficsConfig(PretrainedConfig):
```"""
model_type = "idefics"
is_composition = False
sub_configs = {"perceiver_config": IdeficsPerceiverConfig, "vision_config": IdeficsVisionConfig}
def __init__(
self,

View File

@@ -13,12 +13,9 @@
# limitations under the License.
"""Idefics2 model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -76,7 +73,8 @@ class Idefics2VisionConfig(PretrainedConfig):
>>> configuration = model.config
```"""
model_type = "idefics2"
model_type = "idefics2_vision"
base_config_key = "vision_config"
def __init__(
self,
@@ -107,24 +105,6 @@ class Idefics2VisionConfig(PretrainedConfig):
self.hidden_act = hidden_act
self.initializer_range = initializer_range
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Idefics2Config
if config_dict.get("model_type") == "idefics2":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Idefics2PerceiverConfig(PretrainedConfig):
r"""
@@ -152,7 +132,7 @@ class Idefics2PerceiverConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
"""
model_type = "idefics2"
model_type = "idefics2_perceiver"
def __init__(
self,
@@ -220,7 +200,11 @@ class Idefics2Config(PretrainedConfig):
```"""
model_type = "idefics2"
is_composition = True
sub_configs = {
"text_config": AutoConfig,
"perceiver_config": Idefics2PerceiverConfig,
"vision_config": Idefics2VisionConfig,
}
def __init__(
self,

View File

@@ -13,12 +13,9 @@
# limitations under the License.
"""Idefics3 model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -57,8 +54,7 @@ class Idefics3VisionConfig(PretrainedConfig):
The epsilon used by the layer normalization layers.
attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
intializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation for initializing all weight matrices in the model.
initializer_range (`<fill_type>`, *optional*, defaults to 0.02): <fill_docstring>
Example:
@@ -76,7 +72,8 @@ class Idefics3VisionConfig(PretrainedConfig):
>>> configuration = model.config
```"""
model_type = "idefics3"
model_type = "idefics3_vision"
base_config_key = "vision_config"
def __init__(
self,
@@ -107,24 +104,6 @@ class Idefics3VisionConfig(PretrainedConfig):
self.hidden_act = hidden_act
self.initializer_range = initializer_range
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Idefics3Config
if config_dict.get("model_type") == "idefics3":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Idefics3Config(PretrainedConfig):
r"""
@@ -165,7 +144,7 @@ class Idefics3Config(PretrainedConfig):
```"""
model_type = "idefics3"
is_composition = True
sub_configs = {"text_config": AutoConfig, "vision_config": Idefics3VisionConfig}
def __init__(
self,
@@ -204,4 +183,4 @@ class Idefics3Config(PretrainedConfig):
self.text_config = text_config
self.scale_factor = scale_factor
super().__init__(**kwargs, tie_word_embeddings=tie_word_embeddings)
super().__init__(**kwargs, pad_token_id=pad_token_id, tie_word_embeddings=tie_word_embeddings)

View File

@@ -14,13 +14,10 @@
# limitations under the License.
"""InstructBLIP model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -78,6 +75,7 @@ class InstructBlipVisionConfig(PretrainedConfig):
```"""
model_type = "instructblip_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -108,24 +106,6 @@ class InstructBlipVisionConfig(PretrainedConfig):
self.hidden_act = hidden_act
self.qkv_bias = qkv_bias
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from InstructBlipConfig
if config_dict.get("model_type") == "instructblip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class InstructBlipQFormerConfig(PretrainedConfig):
r"""
@@ -192,6 +172,7 @@ class InstructBlipQFormerConfig(PretrainedConfig):
```"""
model_type = "instructblip_qformer"
base_config_key = "qformer_config"
def __init__(
self,
@@ -229,24 +210,6 @@ class InstructBlipQFormerConfig(PretrainedConfig):
self.cross_attention_frequency = cross_attention_frequency
self.encoder_hidden_size = encoder_hidden_size
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the qformer config dict if we are loading from InstructBlipConfig
if config_dict.get("model_type") == "instructblip":
config_dict = config_dict["qformer_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class InstructBlipConfig(PretrainedConfig):
r"""
@@ -305,6 +268,11 @@ class InstructBlipConfig(PretrainedConfig):
```"""
model_type = "instructblip"
sub_configs = {
"text_config": AutoConfig,
"qformer_config": InstructBlipQFormerConfig,
"vision_config": InstructBlipVisionConfig,
}
def __init__(
self,

View File

@@ -19,13 +19,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -83,6 +81,7 @@ class InstructBlipVideoVisionConfig(PretrainedConfig):
```"""
model_type = "instructblipvideo_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -113,24 +112,6 @@ class InstructBlipVideoVisionConfig(PretrainedConfig):
self.hidden_act = hidden_act
self.qkv_bias = qkv_bias
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from InstructBlipVideoConfig
if config_dict.get("model_type") == "instructblipvideo":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class InstructBlipVideoQFormerConfig(PretrainedConfig):
r"""
@@ -197,6 +178,7 @@ class InstructBlipVideoQFormerConfig(PretrainedConfig):
```"""
model_type = "instructblipvideo_qformer"
base_config_key = "qformer_config"
def __init__(
self,
@@ -234,24 +216,6 @@ class InstructBlipVideoQFormerConfig(PretrainedConfig):
self.cross_attention_frequency = cross_attention_frequency
self.encoder_hidden_size = encoder_hidden_size
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the qformer config dict if we are loading from InstructBlipVideoConfig
if config_dict.get("model_type") == "instructblipvideo":
config_dict = config_dict["qformer_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class InstructBlipVideoConfig(PretrainedConfig):
r"""
@@ -310,6 +274,11 @@ class InstructBlipVideoConfig(PretrainedConfig):
```"""
model_type = "instructblipvideo"
sub_configs = {
"text_config": AutoConfig,
"qformer_config": InstructBlipVideoQFormerConfig,
"vision_config": InstructBlipVideoVisionConfig,
}
def __init__(
self,

View File

@@ -32,7 +32,7 @@ from transformers.models.instructblip.modeling_instructblip import (
from ...configuration_utils import PretrainedConfig
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -103,6 +103,11 @@ class InstructBlipVideoConfig(PretrainedConfig):
```"""
model_type = "instructblipvideo"
sub_configs = {
"text_config": AutoConfig,
"qformer_config": InstructBlipVideoQFormerConfig,
"vision_config": InstructBlipVideoVisionConfig,
}
def __init__(
self,

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""KOSMOS-2 model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -61,7 +58,7 @@ class Kosmos2TextConfig(PretrainedConfig):
layerdrop (`float`, *optional*, defaults to 0.0):
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
for more details.
layer_norm_eps (`float`, *optional*, defaults to 1e-5):
layer_norm_eps (`float`, *optional*, defaults to 1e-05):
The epsilon used by the layer normalization layers.
init_std (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
@@ -69,9 +66,16 @@ class Kosmos2TextConfig(PretrainedConfig):
Scale embeddings by diving by sqrt(embed_dim).
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models).
pad_token_id (`int`, *optional*, defaults to 1):
Token id used for padding.
bos_token_id (`int`, *optional*, defaults to 0):
Token id used for beginning of string.
eos_token_id (`int`, *optional*, defaults to 2):
Token id used for end of string.
```"""
model_type = "kosmos_2_text_model"
base_config_key = "text_config"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {
"num_attention_heads": "attention_heads",
@@ -124,24 +128,6 @@ class Kosmos2TextConfig(PretrainedConfig):
self.scale_embedding = scale_embedding
self.use_cache = use_cache
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from Kosmos2Config
if config_dict.get("model_type") == "kosmos-2":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Kosmos2VisionConfig(PretrainedConfig):
r"""
@@ -171,18 +157,19 @@ class Kosmos2VisionConfig(PretrainedConfig):
hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"selu"` and `"gelu_new"` `"quick_gelu"` are supported.
layer_norm_eps (`float`, *optional*, defaults to 1e-5):
layer_norm_eps (`float`, *optional*, defaults to 1e-05):
The epsilon used by the layer normalization layers.
attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
initializer_factor (`float`, *optional*, defaults to 1):
initializer_factor (`float`, *optional*, defaults to 1.0):
A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
testing).
```"""
model_type = "kosmos_2_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -215,24 +202,6 @@ class Kosmos2VisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Kosmos2Config
if config_dict.get("model_type") == "kosmos-2":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Kosmos2Config(PretrainedConfig):
r"""
@@ -267,7 +236,7 @@ class Kosmos2Config(PretrainedConfig):
```"""
model_type = "kosmos-2"
is_composition = True
sub_configs = {"text_config": Kosmos2TextConfig, "vision_config": Kosmos2VisionConfig}
def __init__(
self,

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -73,7 +73,7 @@ class LlavaConfig(PretrainedConfig):
```"""
model_type = "llava"
is_composition = True
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -78,7 +78,7 @@ class LlavaNextConfig(PretrainedConfig):
```"""
model_type = "llava_next"
is_composition = False
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,

View File

@@ -21,7 +21,7 @@
from ...configuration_utils import PretrainedConfig
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
class LlavaNextVideoConfig(PretrainedConfig):
@@ -86,7 +86,7 @@ class LlavaNextVideoConfig(PretrainedConfig):
```"""
model_type = "llava_next_video"
is_composition = True
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,

View File

@@ -31,7 +31,7 @@ from ...configuration_utils import PretrainedConfig
from ...utils import (
logging,
)
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -99,7 +99,7 @@ class LlavaNextVideoConfig(PretrainedConfig):
```"""
model_type = "llava_next_video"
is_composition = True
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,

View File

@@ -18,7 +18,7 @@ from ...configuration_utils import PretrainedConfig
from ...utils import (
logging,
)
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -81,7 +81,7 @@ class LlavaOnevisionConfig(PretrainedConfig):
```"""
model_type = "llava_onevision"
is_composition = False
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,

View File

@@ -13,8 +13,7 @@
# limitations under the License.
"""Mllama model configuration"""
import os
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional
from ...configuration_utils import PretrainedConfig
from ...modeling_rope_utils import rope_config_validation
@@ -59,7 +58,7 @@ class MllamaVisionConfig(PretrainedConfig):
The size (resolution) of each image *tile*.
patch_size (`int`, *optional*, defaults to 14):
The size (resolution) of each patch.
norm_eps (`float`, *optional*, defaults to 1e-5):
norm_eps (`float`, *optional*, defaults to 1e-05):
The epsilon used by the layer normalization layers.
max_num_tiles (`int`, *optional*, defaults to 4):
Maximum number of tiles for image splitting.
@@ -88,6 +87,7 @@ class MllamaVisionConfig(PretrainedConfig):
```"""
model_type = "mllama_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -137,23 +137,6 @@ class MllamaVisionConfig(PretrainedConfig):
def max_aspect_ratio_id(self) -> int:
return len(self.supported_aspect_ratios)
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "mllama":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class MllamaTextConfig(PretrainedConfig):
r"""
@@ -178,12 +161,12 @@ class MllamaTextConfig(PretrainedConfig):
Number of hidden layers in the Transformer encoder.
num_attention_heads (`int`, *optional*, defaults to 32):
Number of attention heads for each attention layer in the Transformer encoder.
num_key_value_heads (`int`, *optional*):
num_key_value_heads (`int`, *optional*, defaults to 8):
This is the number of key_value heads that should be used to implement Grouped Query Attention. If not
specified, will default to `num_attention_heads`.
intermediate_size (`int`, *optional*, defaults to 14336):
Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
rope_theta (`float`, *optional*, defaults to 500000.0):
rope_theta (`float`, *optional*, defaults to `500000.0`):
The base period of the RoPE embeddings.
rope_scaling (`Dict`, *optional*):
Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
@@ -259,6 +242,7 @@ class MllamaTextConfig(PretrainedConfig):
```"""
model_type = "mllama_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -311,23 +295,6 @@ class MllamaTextConfig(PretrainedConfig):
**kwargs,
)
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "mllama":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class MllamaConfig(PretrainedConfig):
r"""
@@ -370,7 +337,7 @@ class MllamaConfig(PretrainedConfig):
```"""
model_type = "mllama"
is_composition = True
sub_configs = {"text_config": MllamaTextConfig, "vision_config": MllamaVisionConfig}
def __init__(
self,

View File

@@ -235,8 +235,8 @@ class MoshiConfig(PretrainedConfig):
```"""
model_type = "moshi"
is_composition = True
keys_to_ignore_at_inference = ["past_key_values"]
sub_configs = {"audio_encoder_config": AutoConfig}
def __init__(
self,

View File

@@ -41,22 +41,22 @@ class MptAttentionConfig(PretrainedConfig):
Args:
attn_type (`str`, *optional*, defaults to `"multihead_attention"`):
type of attention to use. Options: `"multihead_attention"`, `"multiquery_attention"`.
attn_pdrop (`float`, *optional*, defaults to 0.0):
attn_pdrop (`float`, *optional*, defaults to `0.0`):
The dropout probability for the attention layers.
attn_impl (`str`, *optional*, defaults to `"torch"`):
The attention implementation to use. One of `"torch"`, `"flash"`, or `"triton"`.
clip_qkv (`float`, *optional*):
If not `None`, clip the queries, keys, and values in the attention layer to this value.
softmax_scale (`float`, *optional*, defaults to `None`):
softmax_scale (`float`, *optional*):
If not `None`, scale the softmax in the attention layer by this value. If `None`, will default to
`1/sqrt(hidden_size)`.
prefix_lm (`bool`, *optional*, defaults to `False`)):
prefix_lm (`bool`, *optional*, defaults to `False`):
Whether the model should operate as a Prefix LM. This requires passing an extra `prefix_mask` argument
which indicates which tokens belong to the prefix. Tokens in the prefix can attend to one another
bi-directionally. Tokens outside the prefix use causal attention.
qk_ln (`bool`, *optional*, defaults to `False`):
Whether to apply layer normalization to the queries and keys in the attention layer.
attn_uses_sequence_id (`bool`, *optional*, defaults to `False`)):
attn_uses_sequence_id (`bool`, *optional*, defaults to `False`):
Whether to restrict attention to tokens that have the same token_type_ids. When the model is in `train`
mode, this requires passing an extra *token_type_ids* argument which indicates which sub-sequence each
token belongs to. Defaults to `False` meaning any provided *token_type_ids* will be ignored.
@@ -66,6 +66,8 @@ class MptAttentionConfig(PretrainedConfig):
The maximum value of the alibi bias.
"""
base_config_key = "attn_config"
def __init__(
self,
attn_type="multihead_attention",
@@ -97,23 +99,6 @@ class MptAttentionConfig(PretrainedConfig):
f"`attn_type` has to be either `multihead_attention` or `multiquery_attention`. Received: {attn_type}"
)
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "mpt":
config_dict = config_dict["attn_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class MptConfig(PretrainedConfig):
"""
@@ -188,6 +173,7 @@ class MptConfig(PretrainedConfig):
"""
model_type = "mpt"
sub_configs = {"attn_config": MptAttentionConfig}
attribute_map = {
"num_attention_heads": "n_heads",
"hidden_size": "d_model",

View File

@@ -76,6 +76,7 @@ class MusicgenDecoderConfig(PretrainedConfig):
"""
model_type = "musicgen_decoder"
base_config_key = "decoder_config"
keys_to_ignore_at_inference = ["past_key_values"]
def __init__(
@@ -189,6 +190,11 @@ class MusicgenConfig(PretrainedConfig):
```"""
model_type = "musicgen"
sub_configs = {
"text_encoder": AutoConfig,
"audio_encoder": AutoConfig,
"decoder": MusicgenDecoderConfig,
}
is_composition = True
def __init__(self, **kwargs):

View File

@@ -78,6 +78,7 @@ class MusicgenMelodyDecoderConfig(PretrainedConfig):
"""
model_type = "musicgen_melody_decoder"
base_config_key = "decoder_config"
keys_to_ignore_at_inference = ["past_key_values"]
def __init__(
@@ -195,6 +196,11 @@ class MusicgenMelodyConfig(PretrainedConfig):
```"""
model_type = "musicgen_melody"
sub_configs = {
"text_encoder": AutoConfig,
"audio_encoder": AutoConfig,
"decoder": MusicgenMelodyDecoderConfig,
}
is_composition = True
def __init__(

View File

@@ -14,8 +14,7 @@
# limitations under the License.
"""OWLv2 model configuration"""
import os
from typing import TYPE_CHECKING, Dict, Union
from typing import TYPE_CHECKING, Dict
if TYPE_CHECKING:
@@ -90,6 +89,7 @@ class Owlv2TextConfig(PretrainedConfig):
```"""
model_type = "owlv2_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -123,24 +123,6 @@ class Owlv2TextConfig(PretrainedConfig):
self.initializer_range = initializer_range
self.initializer_factor = initializer_factor
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from Owlv2Config
if config_dict.get("model_type") == "owlv2":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTVisionConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2, 32->16
class Owlv2VisionConfig(PretrainedConfig):
@@ -197,6 +179,7 @@ class Owlv2VisionConfig(PretrainedConfig):
```"""
model_type = "owlv2_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -229,24 +212,6 @@ class Owlv2VisionConfig(PretrainedConfig):
self.initializer_range = initializer_range
self.initializer_factor = initializer_factor
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Owlv2Config
if config_dict.get("model_type") == "owlv2":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
class Owlv2Config(PretrainedConfig):
@@ -276,6 +241,7 @@ class Owlv2Config(PretrainedConfig):
"""
model_type = "owlv2"
sub_configs = {"text_config": Owlv2TextConfig, "vision_config": Owlv2VisionConfig}
def __init__(
self,
@@ -304,20 +270,6 @@ class Owlv2Config(PretrainedConfig):
self.return_dict = return_dict
self.initializer_factor = 1.0
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
@classmethod
def from_text_vision_configs(cls, text_config: Dict, vision_config: Dict, **kwargs):
r"""

View File

@@ -14,9 +14,8 @@
# limitations under the License.
"""OWL-ViT model configuration"""
import os
from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional
if TYPE_CHECKING:
@@ -92,6 +91,7 @@ class OwlViTTextConfig(PretrainedConfig):
```"""
model_type = "owlvit_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -125,24 +125,6 @@ class OwlViTTextConfig(PretrainedConfig):
self.initializer_range = initializer_range
self.initializer_factor = initializer_factor
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from OwlViTConfig
if config_dict.get("model_type") == "owlvit":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class OwlViTVisionConfig(PretrainedConfig):
r"""
@@ -198,6 +180,7 @@ class OwlViTVisionConfig(PretrainedConfig):
```"""
model_type = "owlvit_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -230,24 +213,6 @@ class OwlViTVisionConfig(PretrainedConfig):
self.initializer_range = initializer_range
self.initializer_factor = initializer_factor
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from OwlViTConfig
if config_dict.get("model_type") == "owlvit":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class OwlViTConfig(PretrainedConfig):
r"""
@@ -276,6 +241,7 @@ class OwlViTConfig(PretrainedConfig):
"""
model_type = "owlvit"
sub_configs = {"text_config": OwlViTTextConfig, "vision_config": OwlViTVisionConfig}
def __init__(
self,
@@ -304,20 +270,6 @@ class OwlViTConfig(PretrainedConfig):
self.return_dict = return_dict
self.initializer_factor = 1.0
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
@classmethod
def from_text_vision_configs(cls, text_config: Dict, vision_config: Dict, **kwargs):
r"""

View File

@@ -17,7 +17,7 @@ import warnings
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -73,7 +73,7 @@ class PaliGemmaConfig(PretrainedConfig):
```"""
model_type = "paligemma"
is_composition = False
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -157,7 +157,7 @@ class Qwen2AudioConfig(PretrainedConfig):
```"""
model_type = "qwen2_audio"
is_composition = False
sub_configs = {"text_config": AutoConfig, "audio_config": AutoConfig}
def __init__(
self,

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""Qwen2VL model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...modeling_rope_utils import rope_config_validation
from ...utils import logging
@@ -27,6 +24,7 @@ logger = logging.get_logger(__name__)
class Qwen2VLVisionConfig(PretrainedConfig):
model_type = "qwen2_vl"
base_config_key = "vision_config"
def __init__(
self,
@@ -55,23 +53,6 @@ class Qwen2VLVisionConfig(PretrainedConfig):
self.spatial_merge_size = spatial_merge_size
self.temporal_patch_size = temporal_patch_size
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "qwen2_vl":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Qwen2VLConfig(PretrainedConfig):
r"""
@@ -180,6 +161,7 @@ class Qwen2VLConfig(PretrainedConfig):
```"""
model_type = "qwen2_vl"
sub_configs = {"vision_config": Qwen2VLVisionConfig}
keys_to_ignore_at_inference = ["past_key_values"]
def __init__(

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""Siglip model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -79,6 +76,7 @@ class SiglipTextConfig(PretrainedConfig):
```"""
model_type = "siglip_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -110,24 +108,6 @@ class SiglipTextConfig(PretrainedConfig):
self.hidden_act = hidden_act
self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from SiglipConfig
if config_dict.get("model_type") == "siglip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class SiglipVisionConfig(PretrainedConfig):
r"""
@@ -178,6 +158,7 @@ class SiglipVisionConfig(PretrainedConfig):
```"""
model_type = "siglip_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -206,24 +187,6 @@ class SiglipVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from SiglipConfig
if config_dict.get("model_type") == "siglip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class SiglipConfig(PretrainedConfig):
r"""
@@ -268,6 +231,7 @@ class SiglipConfig(PretrainedConfig):
```"""
model_type = "siglip"
sub_configs = {"text_config": SiglipTextConfig, "vision_config": SiglipVisionConfig}
def __init__(self, text_config=None, vision_config=None, **kwargs):
super().__init__(**kwargs)

View File

@@ -71,6 +71,7 @@ class SpeechEncoderDecoderConfig(PretrainedConfig):
```"""
model_type = "speech-encoder-decoder"
sub_configs = {"encoder": AutoConfig, "decoder": AutoConfig}
is_composition = True
def __init__(self, **kwargs):

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -78,7 +78,7 @@ class VideoLlavaConfig(PretrainedConfig):
```"""
model_type = "video_llava"
is_composition = False
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING
from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__)
@@ -72,7 +72,7 @@ class VipLlavaConfig(PretrainedConfig):
```"""
model_type = "vipllava"
is_composition = False
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,

View File

@@ -78,6 +78,7 @@ class VisionEncoderDecoderConfig(PretrainedConfig):
```"""
model_type = "vision-encoder-decoder"
sub_configs = {"encoder": AutoConfig, "decoder": AutoConfig}
is_composition = True
def __init__(self, **kwargs):

View File

@@ -75,6 +75,7 @@ class VisionTextDualEncoderConfig(PretrainedConfig):
```"""
model_type = "vision-text-dual-encoder"
sub_configs = {"vision_config": AutoConfig, "text_config": AutoConfig}
is_composition = True
def __init__(self, projection_dim=512, logit_scale_init_value=2.6592, **kwargs):

View File

@@ -14,9 +14,6 @@
# limitations under the License.
"""X-CLIP model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig
from ...utils import logging
@@ -79,6 +76,7 @@ class XCLIPTextConfig(PretrainedConfig):
```"""
model_type = "xclip_text_model"
base_config_key = "text_config"
def __init__(
self,
@@ -112,24 +110,6 @@ class XCLIPTextConfig(PretrainedConfig):
self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from XCLIPConfig
if config_dict.get("model_type") == "xclip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class XCLIPVisionConfig(PretrainedConfig):
r"""
@@ -195,6 +175,7 @@ class XCLIPVisionConfig(PretrainedConfig):
```"""
model_type = "xclip_vision_model"
base_config_key = "vision_config"
def __init__(
self,
@@ -239,24 +220,6 @@ class XCLIPVisionConfig(PretrainedConfig):
self.hidden_act = hidden_act
self.drop_path_rate = drop_path_rate
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from XCLIPConfig
if config_dict.get("model_type") == "xclip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class XCLIPConfig(PretrainedConfig):
r"""
@@ -295,6 +258,7 @@ class XCLIPConfig(PretrainedConfig):
"""
model_type = "xclip"
sub_configs = {"text_config": XCLIPTextConfig, "vision_config": XCLIPVisionConfig}
def __init__(
self,

View File

@@ -457,11 +457,20 @@ class AlignModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = AlignModelTester(self)
self.config_tester = ConfigTester(
self,
config_class=AlignConfig,
has_text_modality=False,
common_properties=["projection_dim", "temperature_init_value"],
)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Start to fail after using torch `cu118`.")
def test_multi_gpu_data_parallel_forward(self):
super().test_multi_gpu_data_parallel_forward()

View File

@@ -452,11 +452,20 @@ class AltCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def setUp(self):
self.model_tester = AltCLIPModelTester(self)
self.config_tester = ConfigTester(
self,
config_class=AltCLIPConfig,
has_text_modality=False,
common_properties=["projection_dim", "logit_scale_init_value"],
)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self):
pass

View File

@@ -449,11 +449,18 @@ class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = BlipModelTester(self)
common_properties = ["logit_scale_init_value", "image_text_hidden_size", "projection_dim", "label_smoothing"]
self.config_tester = ConfigTester(
self, config_class=BlipConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self):
pass

View File

@@ -482,6 +482,13 @@ class Blip2ForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, GenerationT
def setUp(self):
self.model_tester = Blip2ForConditionalGenerationDecoderOnlyModelTester(self)
common_properties = ["image_token_index", "num_query_tokens", "image_text_hidden_size"]
self.config_tester = ConfigTester(
self, config_class=Blip2Config, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_for_conditional_generation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -515,11 +515,18 @@ class ClapModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = ClapModelTester(self)
common_properties = ["logit_scale_init_value", "projection_hidden_act", "projection_dim"]
self.config_tester = ConfigTester(
self, config_class=ClapConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self):
pass

View File

@@ -745,11 +745,18 @@ class CLIPModelTest(CLIPModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def setUp(self):
self.model_tester = CLIPModelTester(self)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=CLIPConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self):
pass

View File

@@ -472,11 +472,18 @@ class CLIPSegModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def setUp(self):
self.model_tester = CLIPSegModelTester(self)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=CLIPSegConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
def test_model_for_image_segmentation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model_for_image_segmentation(*config_and_inputs)

View File

@@ -414,7 +414,13 @@ class ClvpModelForConditionalGenerationTest(ModelTesterMixin, unittest.TestCase)
def setUp(self):
self.model_tester = ClvpModelForConditionalGenerationTester(self)
self.clvp_config_tester = ConfigTester(self, config_class=ClvpConfig, hidden_size=32)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.clvp_config_tester = ConfigTester(
self, config_class=ClvpConfig, has_text_modality=False, common_properties=common_properties, hidden_size=32
)
def test_config(self):
self.clvp_config_tester.run_common_tests()
def tearDown(self):
super().tearDown()

View File

@@ -931,11 +931,18 @@ class FlavaModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = self.class_for_tester(self)
common_properties = ["projection_dim", "logit_scale_init_value", "init_codebook"]
self.config_tester = ConfigTester(
self, config_class=FlavaConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="tested in individual model tests")
def test_hidden_states_output(self):
pass

View File

@@ -559,11 +559,18 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def setUp(self):
self.model_tester = GroupViTModelTester(self)
common_properties = ["projection_dim", "projection_intermediate_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=GroupViTConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="hidden_states are tested in individual model tests")
def test_hidden_states_output(self):
pass

View File

@@ -185,7 +185,12 @@ class Idefics2ModelTest(ModelTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = Idefics2VisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=Idefics2Config, has_text_modality=False)
self.config_tester = ConfigTester(
self, config_class=Idefics2Config, has_text_modality=False, common_properties=["image_token_id"]
)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="input_embeds cannot be passed in without input_ids")
def test_inputs_embeds():

View File

@@ -168,7 +168,12 @@ class Idefics3ModelTest(ModelTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = Idefics3VisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=Idefics3Config, has_text_modality=False)
self.config_tester = ConfigTester(
self, config_class=Idefics3Config, has_text_modality=False, common_properties=["image_token_id"]
)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="input_embeds cannot be passed in without input_ids")
def test_inputs_embeds():

View File

@@ -486,6 +486,15 @@ class InstructBlipForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, Gene
def setUp(self):
self.model_tester = InstructBlipForConditionalGenerationDecoderOnlyModelTester(self)
self.config_tester = ConfigTester(
self,
config_class=InstructBlipConfig,
has_text_modality=False,
common_properties=["num_query_tokens", "image_token_index"],
)
def test_config(self):
self.config_tester.run_common_tests()
def test_for_conditional_generation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -510,11 +510,18 @@ class InstructBlipVideoForConditionalGenerationDecoderOnlyTest(
def setUp(self):
self.model_tester = InstructBlipVideoForConditionalGenerationDecoderOnlyModelTester(self)
common_properties = ["num_query_tokens", "video_token_index"]
self.config_tester = ConfigTester(
self, config_class=InstructBlipVideoConfig, has_text_modality=False, common_properties=common_properties
)
def test_for_conditional_generation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_conditional_generation(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self):
pass

View File

@@ -304,7 +304,12 @@ class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def setUp(self):
self.model_tester = Kosmos2ModelTester(self)
self.config_tester = ConfigTester(self, config_class=Kosmos2Config, hidden_size=37)
self.config_tester = ConfigTester(
self, config_class=Kosmos2Config, has_text_modality=False, common_properties=["latent_query_num"]
)
def test_config(self):
self.config_tester.run_common_tests()
# overwrite from common to skip `image_to_text_projection.latent_query`
def test_initialization(self):

View File

@@ -194,7 +194,13 @@ class LlavaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTesterM
def setUp(self):
self.model_tester = LlavaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=LlavaConfig, has_text_modality=False)
common_properties = ["image_token_index", "vision_feature_layer", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=LlavaConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
# overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs
def test_inputs_embeds(self):

View File

@@ -223,7 +223,13 @@ class LlavaNextForConditionalGenerationModelTest(ModelTesterMixin, GenerationTes
def setUp(self):
self.model_tester = LlavaNextVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=LlavaNextConfig, has_text_modality=False)
common_properties = ["image_token_index", "vision_feature_layer", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=LlavaNextConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -240,7 +240,13 @@ class LlavaNextVideoForConditionalGenerationModelTest(ModelTesterMixin, Generati
def setUp(self):
self.model_tester = LlavaNextVideoVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=LlavaNextVideoConfig, has_text_modality=False)
common_properties = ["image_token_index", "video_token_index", "vision_feature_layer", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=LlavaNextVideoConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -226,7 +226,13 @@ class LlavaOnevisionForConditionalGenerationModelTest(ModelTesterMixin, Generati
def setUp(self):
self.model_tester = LlavaOnevisionVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=LlavaOnevisionConfig, has_text_modality=False)
common_properties = ["image_token_index", "video_token_index", "vision_feature_layer"]
self.config_tester = ConfigTester(
self, config_class=LlavaOnevisionConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -272,7 +272,12 @@ class MllamaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTester
def setUp(self):
self.model_tester = MllamaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=MllamaConfig, has_text_modality=False)
self.config_tester = ConfigTester(
self, config_class=MllamaConfig, has_text_modality=False, common_properties=["image_token_index"]
)
def test_config(self):
self.config_tester.run_common_tests()
# overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs
def test_inputs_embeds(self):

View File

@@ -447,6 +447,13 @@ class Owlv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = Owlv2ModelTester(self)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=Owlv2Config, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -442,6 +442,13 @@ class OwlViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = OwlViTModelTester(self)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=OwlViTConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -232,6 +232,9 @@ class Qwen2VLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCas
self.model_tester = Qwen2VLVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=Qwen2VLConfig, has_text_modality=False)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -667,9 +667,12 @@ class SiglipModelTest(SiglipModelTesterMixin, PipelineTesterMixin, unittest.Test
test_disk_offload_bin = False
_is_composite = True
# Copied from tests.models.clip.test_modeling_clip.CLIPModelTest.setUp with CLIP->Siglip
def setUp(self):
self.model_tester = SiglipModelTester(self)
self.config_tester = ConfigTester(self, config_class=SiglipConfig, has_text_modality=False)
def test_config(self):
self.config_tester.run_common_tests()
# Copied from tests.models.clip.test_modeling_clip.CLIPModelTest.test_model
def test_model(self):

View File

@@ -217,7 +217,13 @@ class VideoLlavaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTe
def setUp(self):
self.model_tester = VideoLlavaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=VideoLlavaConfig, has_text_modality=False)
common_properties = ["image_token_index", "video_token_index", "vision_feature_layer", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=VideoLlavaConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(
reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"

View File

@@ -179,7 +179,13 @@ class VipLlavaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTest
def setUp(self):
self.model_tester = VipLlavaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=VipLlavaConfig, has_text_modality=False)
common_properties = ["image_token_index", "vision_feature_layers", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=VipLlavaConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
# overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs
def test_inputs_embeds(self):

View File

@@ -547,6 +547,13 @@ class XCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self):
self.model_tester = XCLIPModelTester(self)
common_properties = ["projection_dim", "prompt_layers", "prompt_num_attention_heads"]
self.config_tester = ConfigTester(
self, config_class=XCLIPConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -17,12 +17,17 @@ import copy
import json
import os
import tempfile
from pathlib import Path
from transformers import is_torch_available
from transformers.utils import direct_transformers_import
from .utils.test_configuration_utils import config_common_kwargs
transformers_module = direct_transformers_import(Path(__file__).parent)
class ConfigTester:
def __init__(self, parent, config_class=None, has_text_modality=True, common_properties=None, **kwargs):
self.parent = parent
@@ -35,9 +40,10 @@ class ConfigTester:
config = self.config_class(**self.inputs_dict)
common_properties = (
["hidden_size", "num_attention_heads", "num_hidden_layers"]
if self.common_properties is None
if self.common_properties is None and not self.config_class.sub_configs
else self.common_properties
)
common_properties = [] if common_properties is None else common_properties
# Add common fields for text models
if self.has_text_modality:
@@ -110,6 +116,44 @@ class ConfigTester:
self.parent.assertEqual(config_second.to_dict(), config_first.to_dict())
def create_and_test_config_from_and_save_pretrained_composite(self):
"""
Tests that composite or nested cofigs can be loaded and saved correctly. In case the config
has a sub-config, we should be able to call `sub_config.from_pretrained('general_config_file')`
and get a result same as if we loaded the whole config and obtained `config.sub_config` from it.
"""
config = self.config_class(**self.inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
config.save_pretrained(tmpdirname)
general_config_loaded = self.config_class.from_pretrained(tmpdirname)
general_config_dict = config.to_dict()
# Iterate over all sub_configs if there are any and load them with their own classes
sub_configs = self.config_class.sub_configs
for sub_config_key, sub_class in sub_configs.items():
if sub_class.__name__ == "AutoConfig":
sub_class = sub_class.for_model(**general_config_dict[sub_config_key]).__class__
sub_config_loaded = sub_class.from_pretrained(tmpdirname)
else:
sub_config_loaded = sub_class.from_pretrained(tmpdirname)
# Pop `transformers_version`, it never exists when a config is part of a general composite config
# Verify that loading with subconfig class results in same dict as if we loaded with general composite config class
sub_config_loaded_dict = sub_config_loaded.to_dict()
sub_config_loaded_dict.pop("transformers_version", None)
self.parent.assertEqual(sub_config_loaded_dict, general_config_dict[sub_config_key])
# Verify that the loaded config type is same as in the general config
type_from_general_config = type(getattr(general_config_loaded, sub_config_key))
self.parent.assertTrue(isinstance(sub_config_loaded, type_from_general_config))
# Now save only the sub-config and load it back to make sure the whole load-save-load pipeline works
with tempfile.TemporaryDirectory() as tmpdirname2:
sub_config_loaded.save_pretrained(tmpdirname2)
sub_config_loaded_2 = sub_class.from_pretrained(tmpdirname2)
self.parent.assertEqual(sub_config_loaded.to_dict(), sub_config_loaded_2.to_dict())
def create_and_test_config_with_num_labels(self):
config = self.config_class(**self.inputs_dict, num_labels=5)
self.parent.assertEqual(len(config.id2label), 5)
@@ -128,6 +172,9 @@ class ConfigTester:
self.parent.assertIsNotNone(config)
def check_config_arguments_init(self):
if self.config_class.sub_configs:
return # TODO: @raushan composite models are not consistent in how they set general params
kwargs = copy.deepcopy(config_common_kwargs)
config = self.config_class(**kwargs)
wrong_values = []
@@ -153,6 +200,7 @@ class ConfigTester:
self.create_and_test_config_to_json_file()
self.create_and_test_config_from_and_save_pretrained()
self.create_and_test_config_from_and_save_pretrained_subfolder()
self.create_and_test_config_from_and_save_pretrained_composite()
self.create_and_test_config_with_num_labels()
self.check_config_can_be_init_without_params()
self.check_config_arguments_init()

View File

@@ -3802,22 +3802,18 @@ class ModelTesterMixin:
self.skipTest("Model is not a composite model.")
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
sub_configs = {
key: getattr(config, key) for key in config if isinstance(getattr(config, key), PretrainedConfig)
}
# set eager as it will be the one supported in all models
# we just need to test if passing 'attn_implementation' as a dict fails or not
attn_implementation_per_subconfig = {}
for key, sub_config in sub_configs.items():
for key in config.sub_configs.keys():
attn_implementation_per_subconfig[key] = "eager"
config._attn_implementation = attn_implementation_per_subconfig
model = model_class(config)
for key in model.config:
if isinstance(getattr(model.config, key), PretrainedConfig):
sub_config = getattr(model.config, key)
self.assertTrue(sub_config._attn_implementation == "eager")
for key in config.sub_configs.keys():
sub_config = getattr(model.config, key)
self.assertTrue(sub_config._attn_implementation == "eager")
for name, submodule in model.named_modules():
class_name = submodule.__class__.__name__
@@ -3826,7 +3822,7 @@ class ModelTesterMixin:
or "SdpaSelfAttention" in class_name
or "FlashAttention" in class_name
):
raise ValueError("The eager model should not have SDPA/FA2 attention layers")
raise ValueError(f"The eager model should not have SDPA/FA2 attention layers but got {class_name}")
@require_torch_sdpa
def test_sdpa_can_dispatch_non_composite_models(self):