Load sub-configs from composite configs (#34410)

* save/load sub-configs

* nit forgot these

* fix copies

* move test to common

* use dict for sub-configs

* add load-save-laod test

* clean up modeling check

* oops this are correct keys

* fix some tests, missed some composite configs

* this model was missed
This commit is contained in:
Raushan Turganbay
2024-11-05 11:34:01 +01:00
committed by GitHub
parent 5e1fd4e204
commit 893ad04fad
78 changed files with 464 additions and 1052 deletions

View File

@@ -190,6 +190,8 @@ class PretrainedConfig(PushToHubMixin):
""" """
model_type: str = "" model_type: str = ""
base_config_key: str = ""
sub_configs: Dict[str, "PretrainedConfig"] = {}
is_composition: bool = False is_composition: bool = False
attribute_map: Dict[str, str] = {} attribute_map: Dict[str, str] = {}
_auto_class: Optional[str] = None _auto_class: Optional[str] = None
@@ -543,11 +545,22 @@ class PretrainedConfig(PushToHubMixin):
cls._set_token_in_kwargs(kwargs, token) cls._set_token_in_kwargs(kwargs, token)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs) config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if cls.base_config_key and cls.base_config_key in config_dict:
config_dict = config_dict[cls.base_config_key]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type: if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning( # sometimes the config has no `base_config_key` if the config is used in several composite models
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type " # e.g. LlamaConfig. In that case we try to see if there is match in `model_type` before raising a warning
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors." for k, v in config_dict.items():
) if isinstance(v, dict) and v.get("model_type") == cls.model_type:
config_dict = v
# raise warning only if we still can't see a match in `model_type`
if config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs) return cls.from_dict(config_dict, **kwargs)

View File

@@ -1608,15 +1608,14 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
# Below we check if a config is composite and manually prepare a dict of attn impl if not already passed as a dict. # Below we check if a config is composite and manually prepare a dict of attn impl if not already passed as a dict.
# Later each sub-module will dispatch with its own attn impl, by calling `XXXModel._from_config(config.text_config)` # Later each sub-module will dispatch with its own attn impl, by calling `XXXModel._from_config(config.text_config)`
# If any of sub-modules doesn't support requested attn, an error will be raised. See https://github.com/huggingface/transformers/pull/32238 # If any of sub-modules doesn't support requested attn, an error will be raised. See https://github.com/huggingface/transformers/pull/32238
for key in config: for key in config.sub_configs.keys():
if isinstance(getattr(config, key), PretrainedConfig): sub_config = getattr(config, key)
sub_config = getattr(config, key) curr_attn_implementation = (
curr_attn_implementation = ( requested_attn_implementation
requested_attn_implementation if not isinstance(requested_attn_implementation, dict)
if not isinstance(requested_attn_implementation, dict) else requested_attn_implementation.get(key, None)
else requested_attn_implementation.get(key, None) )
) sub_config._attn_implementation_internal = curr_attn_implementation
sub_config._attn_implementation_internal = curr_attn_implementation
if use_flash_attention_2: if use_flash_attention_2:
logger.warning_once( logger.warning_once(

View File

@@ -14,8 +14,7 @@
# limitations under the License. # limitations under the License.
"""ALIGN model configuration""" """ALIGN model configuration"""
import os from typing import TYPE_CHECKING, List
from typing import TYPE_CHECKING, List, Union
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -95,6 +94,7 @@ class AlignTextConfig(PretrainedConfig):
```""" ```"""
model_type = "align_text_model" model_type = "align_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -133,24 +133,6 @@ class AlignTextConfig(PretrainedConfig):
self.use_cache = use_cache self.use_cache = use_cache
self.pad_token_id = pad_token_id self.pad_token_id = pad_token_id
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from AlignConfig
if config_dict.get("model_type") == "align":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class AlignVisionConfig(PretrainedConfig): class AlignVisionConfig(PretrainedConfig):
r""" r"""
@@ -223,6 +205,7 @@ class AlignVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "align_vision_model" model_type = "align_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -272,24 +255,6 @@ class AlignVisionConfig(PretrainedConfig):
self.drop_connect_rate = drop_connect_rate self.drop_connect_rate = drop_connect_rate
self.num_hidden_layers = sum(num_block_repeats) * 4 self.num_hidden_layers = sum(num_block_repeats) * 4
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from AlignConfig
if config_dict.get("model_type") == "align":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class AlignConfig(PretrainedConfig): class AlignConfig(PretrainedConfig):
r""" r"""
@@ -340,6 +305,7 @@ class AlignConfig(PretrainedConfig):
```""" ```"""
model_type = "align" model_type = "align"
sub_configs = {"text_config": AlignTextConfig, "vision_config": AlignVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""AltCLIP model configuration""" """AltCLIP model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -199,6 +196,7 @@ class AltCLIPVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "altclip_vision_model" model_type = "altclip_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -233,24 +231,6 @@ class AltCLIPVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from AltCLIPConfig
if config_dict.get("model_type") == "altclip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class AltCLIPConfig(PretrainedConfig): class AltCLIPConfig(PretrainedConfig):
r""" r"""
@@ -298,6 +278,7 @@ class AltCLIPConfig(PretrainedConfig):
```""" ```"""
model_type = "altclip" model_type = "altclip"
sub_configs = {"text_config": AltCLIPTextConfig, "vision_config": AltCLIPVisionConfig}
def __init__( def __init__(
self, text_config=None, vision_config=None, projection_dim=768, logit_scale_init_value=2.6592, **kwargs self, text_config=None, vision_config=None, projection_dim=768, logit_scale_init_value=2.6592, **kwargs

View File

@@ -14,12 +14,11 @@
# limitations under the License. # limitations under the License.
"""BARK model configuration""" """BARK model configuration"""
import os from typing import Dict
from typing import Dict, Optional, Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import add_start_docstrings, logging from ...utils import add_start_docstrings, logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -64,7 +63,6 @@ BARK_SUBMODELCONFIG_START_DOCSTRING = """
class BarkSubModelConfig(PretrainedConfig): class BarkSubModelConfig(PretrainedConfig):
model_type = "bark_module"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = { attribute_map = {
@@ -101,38 +99,6 @@ class BarkSubModelConfig(PretrainedConfig):
super().__init__(**kwargs) super().__init__(**kwargs)
@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: Union[str, os.PathLike],
cache_dir: Optional[Union[str, os.PathLike]] = None,
force_download: bool = False,
local_files_only: bool = False,
token: Optional[Union[str, bool]] = None,
revision: str = "main",
**kwargs,
) -> "PretrainedConfig":
kwargs["cache_dir"] = cache_dir
kwargs["force_download"] = force_download
kwargs["local_files_only"] = local_files_only
kwargs["revision"] = revision
cls._set_token_in_kwargs(kwargs, token)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the config dict if we are loading from Bark
if config_dict.get("model_type") == "bark":
config_dict = config_dict[f"{cls.model_type}_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
@add_start_docstrings( @add_start_docstrings(
BARK_SUBMODELCONFIG_START_DOCSTRING.format(config="BarkSemanticConfig", model="BarkSemanticModel"), BARK_SUBMODELCONFIG_START_DOCSTRING.format(config="BarkSemanticConfig", model="BarkSemanticModel"),
@@ -154,6 +120,7 @@ class BarkSubModelConfig(PretrainedConfig):
) )
class BarkSemanticConfig(BarkSubModelConfig): class BarkSemanticConfig(BarkSubModelConfig):
model_type = "semantic" model_type = "semantic"
base_config_key = "semantic_config"
@add_start_docstrings( @add_start_docstrings(
@@ -176,6 +143,7 @@ class BarkSemanticConfig(BarkSubModelConfig):
) )
class BarkCoarseConfig(BarkSubModelConfig): class BarkCoarseConfig(BarkSubModelConfig):
model_type = "coarse_acoustics" model_type = "coarse_acoustics"
base_config_key = "coarse_acoustics_config"
@add_start_docstrings( @add_start_docstrings(
@@ -203,6 +171,7 @@ class BarkCoarseConfig(BarkSubModelConfig):
) )
class BarkFineConfig(BarkSubModelConfig): class BarkFineConfig(BarkSubModelConfig):
model_type = "fine_acoustics" model_type = "fine_acoustics"
base_config_key = "fine_acoustics_config"
def __init__(self, tie_word_embeddings=True, n_codes_total=8, n_codes_given=1, **kwargs): def __init__(self, tie_word_embeddings=True, n_codes_total=8, n_codes_given=1, **kwargs):
self.n_codes_total = n_codes_total self.n_codes_total = n_codes_total
@@ -265,6 +234,12 @@ class BarkConfig(PretrainedConfig):
""" """
model_type = "bark" model_type = "bark"
sub_configs = {
"semantic_config": BarkSemanticConfig,
"coarse_acoustics_config": BarkCoarseConfig,
"fine_acoustics_config": BarkFineConfig,
"codec_config": AutoConfig,
}
def __init__( def __init__(
self, self,

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""Blip model configuration""" """Blip model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -96,6 +93,7 @@ class BlipTextConfig(PretrainedConfig):
```""" ```"""
model_type = "blip_text_model" model_type = "blip_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -146,24 +144,6 @@ class BlipTextConfig(PretrainedConfig):
self.use_cache = use_cache self.use_cache = use_cache
self.label_smoothing = label_smoothing self.label_smoothing = label_smoothing
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from BlipConfig
if config_dict.get("model_type") == "blip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class BlipVisionConfig(PretrainedConfig): class BlipVisionConfig(PretrainedConfig):
r""" r"""
@@ -215,6 +195,7 @@ class BlipVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "blip_vision_model" model_type = "blip_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -245,24 +226,6 @@ class BlipVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from BlipConfig
if config_dict.get("model_type") == "blip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class BlipConfig(PretrainedConfig): class BlipConfig(PretrainedConfig):
r""" r"""
@@ -316,6 +279,7 @@ class BlipConfig(PretrainedConfig):
```""" ```"""
model_type = "blip" model_type = "blip"
sub_configs = {"text_config": BlipTextConfig, "vision_config": BlipVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -14,13 +14,12 @@
# limitations under the License. # limitations under the License.
"""BLIP-2 model configuration""" """BLIP-2 model configuration"""
import os from typing import Optional
from typing import Optional, Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -76,6 +75,7 @@ class Blip2VisionConfig(PretrainedConfig):
```""" ```"""
model_type = "blip_2_vision_model" model_type = "blip_2_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -106,24 +106,6 @@ class Blip2VisionConfig(PretrainedConfig):
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.qkv_bias = qkv_bias self.qkv_bias = qkv_bias
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Blip2Config
if config_dict.get("model_type") == "blip-2":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Blip2QFormerConfig(PretrainedConfig): class Blip2QFormerConfig(PretrainedConfig):
r""" r"""
@@ -190,6 +172,7 @@ class Blip2QFormerConfig(PretrainedConfig):
```""" ```"""
model_type = "blip_2_qformer" model_type = "blip_2_qformer"
base_config_key = "qformer_config"
def __init__( def __init__(
self, self,
@@ -229,24 +212,6 @@ class Blip2QFormerConfig(PretrainedConfig):
self.encoder_hidden_size = encoder_hidden_size self.encoder_hidden_size = encoder_hidden_size
self.use_qformer_text_input = use_qformer_text_input self.use_qformer_text_input = use_qformer_text_input
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the qformer config dict if we are loading from Blip2Config
if config_dict.get("model_type") == "blip-2":
config_dict = config_dict["qformer_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Blip2Config(PretrainedConfig): class Blip2Config(PretrainedConfig):
r""" r"""
@@ -306,6 +271,7 @@ class Blip2Config(PretrainedConfig):
```""" ```"""
model_type = "blip-2" model_type = "blip-2"
sub_configs = {"text_config": AutoConfig, "qformer_config": Blip2QFormerConfig, "vision_config": Blip2VisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""BridgeTower model configuration""" """BridgeTower model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -68,6 +65,7 @@ class BridgeTowerVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "bridgetower_vision_model" model_type = "bridgetower_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -95,21 +93,6 @@ class BridgeTowerVisionConfig(PretrainedConfig):
self.share_layernorm = share_layernorm self.share_layernorm = share_layernorm
self.remove_last_layer = remove_last_layer self.remove_last_layer = remove_last_layer
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "bridgetower":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class BridgeTowerTextConfig(PretrainedConfig): class BridgeTowerTextConfig(PretrainedConfig):
r""" r"""
@@ -175,6 +158,7 @@ class BridgeTowerTextConfig(PretrainedConfig):
```""" ```"""
model_type = "bridgetower_text_model" model_type = "bridgetower_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -217,21 +201,6 @@ class BridgeTowerTextConfig(PretrainedConfig):
self.bos_token_id = bos_token_id self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id self.eos_token_id = eos_token_id
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "bridgetower":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class BridgeTowerConfig(PretrainedConfig): class BridgeTowerConfig(PretrainedConfig):
r""" r"""
@@ -288,6 +257,7 @@ class BridgeTowerConfig(PretrainedConfig):
```""" ```"""
model_type = "bridgetower" model_type = "bridgetower"
sub_configs = {"text_config": BridgeTowerTextConfig, "vision_config": BridgeTowerVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -62,6 +62,7 @@ class ChameleonVQVAEConfig(PretrainedConfig):
""" """
model_type = "chameleon_vqgan" model_type = "chameleon_vqgan"
base_config_key = "vq_config"
def __init__( def __init__(
self, self,
@@ -187,6 +188,7 @@ class ChameleonConfig(PretrainedConfig):
```""" ```"""
model_type = "chameleon" model_type = "chameleon"
sub_configs = {"vq_config": ChameleonVQVAEConfig}
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
def __init__( def __init__(

View File

@@ -14,9 +14,8 @@
# limitations under the License. # limitations under the License.
"""Chinese-CLIP model configuration""" """Chinese-CLIP model configuration"""
import os
from collections import OrderedDict from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union from typing import TYPE_CHECKING, Any, Mapping, Optional
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -102,6 +101,7 @@ class ChineseCLIPTextConfig(PretrainedConfig):
```""" ```"""
model_type = "chinese_clip_text_model" model_type = "chinese_clip_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -141,24 +141,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
self.position_embedding_type = position_embedding_type self.position_embedding_type = position_embedding_type
self.use_cache = use_cache self.use_cache = use_cache
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from ChineseCLIPConfig
if config_dict.get("model_type") == "chinese_clip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ChineseCLIPVisionConfig(PretrainedConfig): class ChineseCLIPVisionConfig(PretrainedConfig):
r""" r"""
@@ -215,6 +197,7 @@ class ChineseCLIPVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "chinese_clip_vision_model" model_type = "chinese_clip_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -249,24 +232,6 @@ class ChineseCLIPVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from ChineseCLIPConfig
if config_dict.get("model_type") == "chinese_clip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ChineseCLIPConfig(PretrainedConfig): class ChineseCLIPConfig(PretrainedConfig):
r""" r"""
@@ -316,6 +281,7 @@ class ChineseCLIPConfig(PretrainedConfig):
```""" ```"""
model_type = "chinese_clip" model_type = "chinese_clip"
sub_configs = {"text_config": ChineseCLIPTextConfig, "vision_config": ChineseCLIPVisionConfig}
def __init__( def __init__(
self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""CLAP model configuration""" """CLAP model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -94,6 +91,7 @@ class ClapTextConfig(PretrainedConfig):
```""" ```"""
model_type = "clap_text_model" model_type = "clap_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -137,24 +135,6 @@ class ClapTextConfig(PretrainedConfig):
self.projection_hidden_act = projection_hidden_act self.projection_hidden_act = projection_hidden_act
self.projection_dim = projection_dim self.projection_dim = projection_dim
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from ClapConfig
if config_dict.get("model_type") == "clap":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ClapAudioConfig(PretrainedConfig): class ClapAudioConfig(PretrainedConfig):
r""" r"""
@@ -245,6 +225,7 @@ class ClapAudioConfig(PretrainedConfig):
```""" ```"""
model_type = "clap_audio_model" model_type = "clap_audio_model"
base_config_key = "audio_config"
def __init__( def __init__(
self, self,
@@ -307,24 +288,6 @@ class ClapAudioConfig(PretrainedConfig):
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
self.projection_hidden_act = projection_hidden_act self.projection_hidden_act = projection_hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the audio config dict if we are loading from ClapConfig
if config_dict.get("model_type") == "clap":
config_dict = config_dict["audio_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ClapConfig(PretrainedConfig): class ClapConfig(PretrainedConfig):
r""" r"""
@@ -377,6 +340,7 @@ class ClapConfig(PretrainedConfig):
```""" ```"""
model_type = "clap" model_type = "clap"
sub_configs = {"text_config": ClapTextConfig, "audio_config": ClapAudioConfig}
def __init__( def __init__(
self, self,

View File

@@ -14,9 +14,8 @@
# limitations under the License. # limitations under the License.
"""CLIP model configuration""" """CLIP model configuration"""
import os
from collections import OrderedDict from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union from typing import TYPE_CHECKING, Any, Mapping, Optional
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -93,6 +92,7 @@ class CLIPTextConfig(PretrainedConfig):
```""" ```"""
model_type = "clip_text_model" model_type = "clip_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -130,24 +130,6 @@ class CLIPTextConfig(PretrainedConfig):
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from CLIPConfig
if config_dict.get("model_type") == "clip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class CLIPVisionConfig(PretrainedConfig): class CLIPVisionConfig(PretrainedConfig):
r""" r"""
@@ -205,6 +187,7 @@ class CLIPVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "clip_vision_model" model_type = "clip_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -239,24 +222,6 @@ class CLIPVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from CLIPConfig
if config_dict.get("model_type") == "clip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class CLIPConfig(PretrainedConfig): class CLIPConfig(PretrainedConfig):
r""" r"""
@@ -305,6 +270,7 @@ class CLIPConfig(PretrainedConfig):
```""" ```"""
model_type = "clip" model_type = "clip"
sub_configs = {"text_config": CLIPTextConfig, "vision_config": CLIPVisionConfig}
def __init__( def __init__(
self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""CLIPSeg model configuration""" """CLIPSeg model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -84,6 +81,7 @@ class CLIPSegTextConfig(PretrainedConfig):
```""" ```"""
model_type = "clipseg_text_model" model_type = "clipseg_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -117,24 +115,6 @@ class CLIPSegTextConfig(PretrainedConfig):
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from CLIPSegConfig
if config_dict.get("model_type") == "clipseg":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class CLIPSegVisionConfig(PretrainedConfig): class CLIPSegVisionConfig(PretrainedConfig):
r""" r"""
@@ -190,6 +170,7 @@ class CLIPSegVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "clipseg_vision_model" model_type = "clipseg_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -222,24 +203,6 @@ class CLIPSegVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from CLIPSegConfig
if config_dict.get("model_type") == "clipseg":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class CLIPSegConfig(PretrainedConfig): class CLIPSegConfig(PretrainedConfig):
r""" r"""
@@ -306,6 +269,7 @@ class CLIPSegConfig(PretrainedConfig):
```""" ```"""
model_type = "clipseg" model_type = "clipseg"
sub_configs = {"text_config": CLIPSegTextConfig, "vision_config": CLIPSegVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -91,6 +91,7 @@ class ClvpEncoderConfig(PretrainedConfig):
```""" ```"""
model_type = "clvp_encoder" model_type = "clvp_encoder"
base_config_key = ["text_config", "speech_config"]
def __init__( def __init__(
self, self,
@@ -141,7 +142,7 @@ class ClvpEncoderConfig(PretrainedConfig):
# make sure to have the config_type be either "text_config" or "speech_config" # make sure to have the config_type be either "text_config" or "speech_config"
# this is to make sure that we can load only text or speech configs from the nested ClvpConfig. # this is to make sure that we can load only text or speech configs from the nested ClvpConfig.
if config_type not in ["text_config", "speech_config"]: if config_type not in cls.base_config_key:
raise ValueError( raise ValueError(
f"We can only load either 'text_config' or 'speech_config' but you are trying to load" f"{config_type}" f"We can only load either 'text_config' or 'speech_config' but you are trying to load" f"{config_type}"
) )
@@ -253,6 +254,7 @@ class ClvpDecoderConfig(PretrainedConfig):
```""" ```"""
model_type = "clvp_decoder" model_type = "clvp_decoder"
base_config_key = "decoder_config"
def __init__( def __init__(
self, self,
@@ -314,24 +316,6 @@ class ClvpDecoderConfig(PretrainedConfig):
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs) super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the speech config dict if we are loading from ClvpConfig
if config_dict.get("model_type") == "clvp":
config_dict = config_dict["decoder_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class ClvpConfig(PretrainedConfig): class ClvpConfig(PretrainedConfig):
r""" r"""
@@ -386,7 +370,11 @@ class ClvpConfig(PretrainedConfig):
```""" ```"""
model_type = "clvp" model_type = "clvp"
is_composition = True sub_configs = {
"text_config": ClvpEncoderConfig,
"speech_config": ClvpEncoderConfig,
"decoder_config": ClvpDecoderConfig,
}
def __init__( def __init__(
self, self,

View File

@@ -41,6 +41,8 @@ class DbrxAttentionConfig(PretrainedConfig):
rope_theta (`float`, *optional*, defaults to 10000.0): The base frequency for rope. rope_theta (`float`, *optional*, defaults to 10000.0): The base frequency for rope.
""" """
base_config_key = "attn_config"
def __init__( def __init__(
self, self,
attn_pdrop: float = 0.0, attn_pdrop: float = 0.0,
@@ -55,29 +57,12 @@ class DbrxAttentionConfig(PretrainedConfig):
self.kv_n_heads = kv_n_heads self.kv_n_heads = kv_n_heads
self.rope_theta = rope_theta self.rope_theta = rope_theta
for k in ["model_type"]: for k in ["model_type", "attn_implementation", "transformers_version", "_commit_hash"]:
if k in kwargs: if k in kwargs:
kwargs.pop(k) kwargs.pop(k)
if len(kwargs) != 0: if len(kwargs) != 0:
raise ValueError(f"Found unknown {kwargs=}") raise ValueError(f"Found unknown {kwargs=}")
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs: Any) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "dbrx":
config_dict = config_dict["attn_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class DbrxFFNConfig(PretrainedConfig): class DbrxFFNConfig(PretrainedConfig):
"""Configuration class for Dbrx FFN. """Configuration class for Dbrx FFN.
@@ -100,6 +85,8 @@ class DbrxFFNConfig(PretrainedConfig):
moe_normalize_expert_weights (`float`, *optional*, defaults to 1.0): The normalization factor for the expert weights. moe_normalize_expert_weights (`float`, *optional*, defaults to 1.0): The normalization factor for the expert weights.
""" """
base_config_key = "ffn_config"
def __init__( def __init__(
self, self,
ffn_act_fn: dict = None, ffn_act_fn: dict = None,
@@ -122,29 +109,12 @@ class DbrxFFNConfig(PretrainedConfig):
self.moe_loss_weight = moe_loss_weight self.moe_loss_weight = moe_loss_weight
self.moe_normalize_expert_weights = moe_normalize_expert_weights self.moe_normalize_expert_weights = moe_normalize_expert_weights
for k in ["model_type"]: for k in ["model_type", "attn_implementation", "transformers_version", "_commit_hash"]:
if k in kwargs: if k in kwargs:
kwargs.pop(k) kwargs.pop(k)
if len(kwargs) != 0: if len(kwargs) != 0:
raise ValueError(f"Found unknown {kwargs=}") raise ValueError(f"Found unknown {kwargs=}")
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs: Any) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "dbrx":
config_dict = config_dict["ffn_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class DbrxConfig(PretrainedConfig): class DbrxConfig(PretrainedConfig):
r""" r"""
@@ -202,6 +172,7 @@ class DbrxConfig(PretrainedConfig):
""" """
model_type = "dbrx" model_type = "dbrx"
sub_configs = {"attn_config": DbrxAttentionConfig, "ffn_config": DbrxFFNConfig}
attribute_map = { attribute_map = {
"num_attention_heads": "n_heads", "num_attention_heads": "n_heads",
"hidden_size": "d_model", "hidden_size": "d_model",

View File

@@ -17,6 +17,7 @@
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -70,6 +71,7 @@ class EncoderDecoderConfig(PretrainedConfig):
```""" ```"""
model_type = "encoder-decoder" model_type = "encoder-decoder"
sub_configs = {"encoder": AutoConfig, "decoder": AutoConfig}
is_composition = True is_composition = True
def __init__(self, **kwargs): def __init__(self, **kwargs):
@@ -84,8 +86,6 @@ class EncoderDecoderConfig(PretrainedConfig):
decoder_config = kwargs.pop("decoder") decoder_config = kwargs.pop("decoder")
decoder_model_type = decoder_config.pop("model_type") decoder_model_type = decoder_config.pop("model_type")
from ..auto.configuration_auto import AutoConfig
self.encoder = AutoConfig.for_model(encoder_model_type, **encoder_config) self.encoder = AutoConfig.for_model(encoder_model_type, **encoder_config)
self.decoder = AutoConfig.for_model(decoder_model_type, **decoder_config) self.decoder = AutoConfig.for_model(decoder_model_type, **decoder_config)
self.is_encoder_decoder = True self.is_encoder_decoder = True

View File

@@ -164,6 +164,7 @@ class FastSpeech2ConformerConfig(PretrainedConfig):
```""" ```"""
model_type = "fastspeech2_conformer" model_type = "fastspeech2_conformer"
base_config_key = "model_config"
attribute_map = {"num_hidden_layers": "encoder_layers", "num_attention_heads": "encoder_num_attention_heads"} attribute_map = {"num_hidden_layers": "encoder_layers", "num_attention_heads": "encoder_num_attention_heads"}
def __init__( def __init__(
@@ -377,6 +378,7 @@ class FastSpeech2ConformerHifiGanConfig(PretrainedConfig):
```""" ```"""
model_type = "hifigan" model_type = "hifigan"
base_config_key = "vocoder_config"
def __init__( def __init__(
self, self,
@@ -453,7 +455,7 @@ class FastSpeech2ConformerWithHifiGanConfig(PretrainedConfig):
""" """
model_type = "fastspeech2_conformer_with_hifigan" model_type = "fastspeech2_conformer_with_hifigan"
is_composition = True sub_configs = {"model_config": FastSpeech2ConformerConfig, "vocoder_config": FastSpeech2ConformerHifiGanConfig}
def __init__( def __init__(
self, self,

View File

@@ -14,8 +14,7 @@
# limitations under the License. # limitations under the License.
"""FLAVA model configurations""" """FLAVA model configurations"""
import os from typing import Any, Dict
from typing import Any, Dict, Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -86,6 +85,7 @@ class FlavaImageConfig(PretrainedConfig):
```""" ```"""
model_type = "flava_image_model" model_type = "flava_image_model"
base_config_key = "image_config"
def __init__( def __init__(
self, self,
@@ -124,24 +124,6 @@ class FlavaImageConfig(PretrainedConfig):
self.mask_token = mask_token self.mask_token = mask_token
self.vocab_size = vocab_size self.vocab_size = vocab_size
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the image config dict if we are loading from FlavaConfig
if config_dict.get("model_type") == "flava":
config_dict = config_dict["image_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class FlavaTextConfig(PretrainedConfig): class FlavaTextConfig(PretrainedConfig):
r""" r"""
@@ -216,6 +198,7 @@ class FlavaTextConfig(PretrainedConfig):
```""" ```"""
model_type = "flava_text_model" model_type = "flava_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -254,24 +237,6 @@ class FlavaTextConfig(PretrainedConfig):
self.qkv_bias = qkv_bias self.qkv_bias = qkv_bias
self.pad_token_id = pad_token_id self.pad_token_id = pad_token_id
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from FlavaConfig
if config_dict.get("model_type") == "flava":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class FlavaMultimodalConfig(PretrainedConfig): class FlavaMultimodalConfig(PretrainedConfig):
r""" r"""
@@ -327,6 +292,7 @@ class FlavaMultimodalConfig(PretrainedConfig):
```""" ```"""
model_type = "flava_multimodal_model" model_type = "flava_multimodal_model"
base_config_key = "multimodal_config"
def __init__( def __init__(
self, self,
@@ -357,27 +323,10 @@ class FlavaMultimodalConfig(PretrainedConfig):
self.qkv_bias = qkv_bias self.qkv_bias = qkv_bias
self.use_cls_token = use_cls_token self.use_cls_token = use_cls_token
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the multimodal config dict if we are loading from FlavaConfig
if config_dict.get("model_type") == "flava":
config_dict = config_dict["multimodal_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class FlavaImageCodebookConfig(PretrainedConfig): class FlavaImageCodebookConfig(PretrainedConfig):
model_type = "flava_image_codebook" model_type = "flava_image_codebook"
base_config_key = "image_codebook_config"
r""" r"""
[`FlavaImageCodebookConfig`] is the configuration class to store the configuration of a [`FlavaImageCodebook`]. It [`FlavaImageCodebookConfig`] is the configuration class to store the configuration of a [`FlavaImageCodebook`]. It
@@ -442,24 +391,6 @@ class FlavaImageCodebookConfig(PretrainedConfig):
self.freeze = freeze self.freeze = freeze
self.initializer_range = initializer_range self.initializer_range = initializer_range
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the image codebook config dict if we are loading from FlavaConfig
if config_dict.get("model_type") == "flava":
config_dict = config_dict["image_codebook_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class FlavaConfig(PretrainedConfig): class FlavaConfig(PretrainedConfig):
r""" r"""
@@ -532,6 +463,12 @@ class FlavaConfig(PretrainedConfig):
""" """
model_type = "flava" model_type = "flava"
sub_configs = {
"text_config": FlavaTextConfig,
"image_config": FlavaImageConfig,
"multimodal_config": FlavaMultimodalConfig,
"image_codebook_config": FlavaImageCodebookConfig,
}
def __init__( def __init__(
self, self,

View File

@@ -13,8 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -72,6 +70,7 @@ class GitVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "git_vision_model" model_type = "git_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -102,24 +101,6 @@ class GitVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from GITConfig
if config_dict.get("model_type") == "git":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class GitConfig(PretrainedConfig): class GitConfig(PretrainedConfig):
r""" r"""
@@ -186,6 +167,7 @@ class GitConfig(PretrainedConfig):
```""" ```"""
model_type = "git" model_type = "git"
sub_configs = {"vision_config": GitVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -14,9 +14,8 @@
# limitations under the License. # limitations under the License.
"""GroupViT model configuration""" """GroupViT model configuration"""
import os
from collections import OrderedDict from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union from typing import TYPE_CHECKING, Any, Mapping, Optional
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig from ...onnx import OnnxConfig
@@ -86,6 +85,7 @@ class GroupViTTextConfig(PretrainedConfig):
```""" ```"""
model_type = "groupvit_text_model" model_type = "groupvit_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -121,24 +121,6 @@ class GroupViTTextConfig(PretrainedConfig):
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from GroupViTConfig
if config_dict.get("model_type") == "groupvit":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class GroupViTVisionConfig(PretrainedConfig): class GroupViTVisionConfig(PretrainedConfig):
r""" r"""
@@ -197,6 +179,7 @@ class GroupViTVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "groupvit_vision_model" model_type = "groupvit_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -246,24 +229,6 @@ class GroupViTVisionConfig(PretrainedConfig):
self.assign_eps = assign_eps self.assign_eps = assign_eps
self.assign_mlp_ratio = assign_mlp_ratio self.assign_mlp_ratio = assign_mlp_ratio
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from GroupViTConfig
if config_dict.get("model_type") == "groupvit":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class GroupViTConfig(PretrainedConfig): class GroupViTConfig(PretrainedConfig):
r""" r"""
@@ -292,6 +257,7 @@ class GroupViTConfig(PretrainedConfig):
""" """
model_type = "groupvit" model_type = "groupvit"
sub_configs = {"text_config": GroupViTTextConfig, "vision_config": GroupViTVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -38,7 +38,7 @@ class IdeficsVisionConfig(PretrainedConfig):
documentation from [`PretrainedConfig`] for more information. documentation from [`PretrainedConfig`] for more information.
Args: Args:
hidden_size (`int`, *optional*, defaults to 768): embed_dim (`int`, *optional*, defaults to 768):
Dimensionality of the encoder layers and the pooler layer. (elsewhere referred to as `hidden_size`) Dimensionality of the encoder layers and the pooler layer. (elsewhere referred to as `hidden_size`)
image_size (`int`, *optional*, defaults to 224): image_size (`int`, *optional*, defaults to 224):
The size (resolution) of each image. The size (resolution) of each image.
@@ -50,12 +50,12 @@ class IdeficsVisionConfig(PretrainedConfig):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (`int`, *optional*, defaults to 16): num_attention_heads (`int`, *optional*, defaults to 16):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
image_num_channels (`int`, *optional*, defaults to `3`): num_channels (`int`, *optional*, defaults to 3):
Number of image channels. Number of image channels.
hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`): hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"selu"` and `"gelu_new"` `"quick_gelu"` are supported. `"relu"`, `"selu"` and `"gelu_new"` `"quick_gelu"` are supported.
layer_norm_eps (`float`, *optional*, defaults to 1e-5): layer_norm_eps (`float`, *optional*, defaults to 1e-05):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
attention_dropout (`float`, *optional*, defaults to 0.0): attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
@@ -64,11 +64,9 @@ class IdeficsVisionConfig(PretrainedConfig):
initializer_factor (`float`, *optional*, defaults to 1.0): initializer_factor (`float`, *optional*, defaults to 1.0):
A factor for initializing all weight matrices (should be kept to 1.0, used internally for initialization A factor for initializing all weight matrices (should be kept to 1.0, used internally for initialization
testing). testing).
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
""" """
model_type = "idefics" model_type = "idefics_vision"
attribute_map = { attribute_map = {
"hidden_size": "embed_dim", "hidden_size": "embed_dim",
} }
@@ -119,7 +117,7 @@ class IdeficsPerceiverConfig(PretrainedConfig):
Args: Args:
use_resampler (`bool`, *optional*, defaults to `False`): use_resampler (`bool`, *optional*, defaults to `False`):
Whether or not to use the resampler Whether or not to use the resampler
resampler_n_latents (`int`, *optional*, defaults to ): resampler_n_latents (`int`, *optional*, defaults to 64):
Number of latent embeddings to resample ("compress") the input sequence to (usually < 128). Number of latent embeddings to resample ("compress") the input sequence to (usually < 128).
resampler_depth (`int`, *optional*, defaults to 6): resampler_depth (`int`, *optional*, defaults to 6):
Depth of the Perceiver Resampler (Transformer w/ cross attention). Should be shallow (< 3). Depth of the Perceiver Resampler (Transformer w/ cross attention). Should be shallow (< 3).
@@ -131,7 +129,7 @@ class IdeficsPerceiverConfig(PretrainedConfig):
Whether or not to use qk layer norms in perceiver Whether or not to use qk layer norms in perceiver
""" """
model_type = "idefics" model_type = "idefics_perciever"
def __init__( def __init__(
self, self,
@@ -235,7 +233,7 @@ class IdeficsConfig(PretrainedConfig):
```""" ```"""
model_type = "idefics" model_type = "idefics"
is_composition = False sub_configs = {"perceiver_config": IdeficsPerceiverConfig, "vision_config": IdeficsVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -13,12 +13,9 @@
# limitations under the License. # limitations under the License.
"""Idefics2 model configuration""" """Idefics2 model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -76,7 +73,8 @@ class Idefics2VisionConfig(PretrainedConfig):
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "idefics2" model_type = "idefics2_vision"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -107,24 +105,6 @@ class Idefics2VisionConfig(PretrainedConfig):
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.initializer_range = initializer_range self.initializer_range = initializer_range
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Idefics2Config
if config_dict.get("model_type") == "idefics2":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Idefics2PerceiverConfig(PretrainedConfig): class Idefics2PerceiverConfig(PretrainedConfig):
r""" r"""
@@ -152,7 +132,7 @@ class Idefics2PerceiverConfig(PretrainedConfig):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
""" """
model_type = "idefics2" model_type = "idefics2_perceiver"
def __init__( def __init__(
self, self,
@@ -220,7 +200,11 @@ class Idefics2Config(PretrainedConfig):
```""" ```"""
model_type = "idefics2" model_type = "idefics2"
is_composition = True sub_configs = {
"text_config": AutoConfig,
"perceiver_config": Idefics2PerceiverConfig,
"vision_config": Idefics2VisionConfig,
}
def __init__( def __init__(
self, self,

View File

@@ -13,12 +13,9 @@
# limitations under the License. # limitations under the License.
"""Idefics3 model configuration""" """Idefics3 model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -57,8 +54,7 @@ class Idefics3VisionConfig(PretrainedConfig):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
attention_dropout (`float`, *optional*, defaults to 0.0): attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
intializer_range (`float`, *optional*, defaults to 0.02): initializer_range (`<fill_type>`, *optional*, defaults to 0.02): <fill_docstring>
The standard deviation for initializing all weight matrices in the model.
Example: Example:
@@ -76,7 +72,8 @@ class Idefics3VisionConfig(PretrainedConfig):
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "idefics3" model_type = "idefics3_vision"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -107,24 +104,6 @@ class Idefics3VisionConfig(PretrainedConfig):
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.initializer_range = initializer_range self.initializer_range = initializer_range
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Idefics3Config
if config_dict.get("model_type") == "idefics3":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Idefics3Config(PretrainedConfig): class Idefics3Config(PretrainedConfig):
r""" r"""
@@ -165,7 +144,7 @@ class Idefics3Config(PretrainedConfig):
```""" ```"""
model_type = "idefics3" model_type = "idefics3"
is_composition = True sub_configs = {"text_config": AutoConfig, "vision_config": Idefics3VisionConfig}
def __init__( def __init__(
self, self,
@@ -204,4 +183,4 @@ class Idefics3Config(PretrainedConfig):
self.text_config = text_config self.text_config = text_config
self.scale_factor = scale_factor self.scale_factor = scale_factor
super().__init__(**kwargs, tie_word_embeddings=tie_word_embeddings) super().__init__(**kwargs, pad_token_id=pad_token_id, tie_word_embeddings=tie_word_embeddings)

View File

@@ -14,13 +14,10 @@
# limitations under the License. # limitations under the License.
"""InstructBLIP model configuration""" """InstructBLIP model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -78,6 +75,7 @@ class InstructBlipVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "instructblip_vision_model" model_type = "instructblip_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -108,24 +106,6 @@ class InstructBlipVisionConfig(PretrainedConfig):
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.qkv_bias = qkv_bias self.qkv_bias = qkv_bias
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from InstructBlipConfig
if config_dict.get("model_type") == "instructblip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class InstructBlipQFormerConfig(PretrainedConfig): class InstructBlipQFormerConfig(PretrainedConfig):
r""" r"""
@@ -192,6 +172,7 @@ class InstructBlipQFormerConfig(PretrainedConfig):
```""" ```"""
model_type = "instructblip_qformer" model_type = "instructblip_qformer"
base_config_key = "qformer_config"
def __init__( def __init__(
self, self,
@@ -229,24 +210,6 @@ class InstructBlipQFormerConfig(PretrainedConfig):
self.cross_attention_frequency = cross_attention_frequency self.cross_attention_frequency = cross_attention_frequency
self.encoder_hidden_size = encoder_hidden_size self.encoder_hidden_size = encoder_hidden_size
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the qformer config dict if we are loading from InstructBlipConfig
if config_dict.get("model_type") == "instructblip":
config_dict = config_dict["qformer_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class InstructBlipConfig(PretrainedConfig): class InstructBlipConfig(PretrainedConfig):
r""" r"""
@@ -305,6 +268,11 @@ class InstructBlipConfig(PretrainedConfig):
```""" ```"""
model_type = "instructblip" model_type = "instructblip"
sub_configs = {
"text_config": AutoConfig,
"qformer_config": InstructBlipQFormerConfig,
"vision_config": InstructBlipVisionConfig,
}
def __init__( def __init__(
self, self,

View File

@@ -19,13 +19,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -83,6 +81,7 @@ class InstructBlipVideoVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "instructblipvideo_vision_model" model_type = "instructblipvideo_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -113,24 +112,6 @@ class InstructBlipVideoVisionConfig(PretrainedConfig):
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.qkv_bias = qkv_bias self.qkv_bias = qkv_bias
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from InstructBlipVideoConfig
if config_dict.get("model_type") == "instructblipvideo":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class InstructBlipVideoQFormerConfig(PretrainedConfig): class InstructBlipVideoQFormerConfig(PretrainedConfig):
r""" r"""
@@ -197,6 +178,7 @@ class InstructBlipVideoQFormerConfig(PretrainedConfig):
```""" ```"""
model_type = "instructblipvideo_qformer" model_type = "instructblipvideo_qformer"
base_config_key = "qformer_config"
def __init__( def __init__(
self, self,
@@ -234,24 +216,6 @@ class InstructBlipVideoQFormerConfig(PretrainedConfig):
self.cross_attention_frequency = cross_attention_frequency self.cross_attention_frequency = cross_attention_frequency
self.encoder_hidden_size = encoder_hidden_size self.encoder_hidden_size = encoder_hidden_size
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the qformer config dict if we are loading from InstructBlipVideoConfig
if config_dict.get("model_type") == "instructblipvideo":
config_dict = config_dict["qformer_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class InstructBlipVideoConfig(PretrainedConfig): class InstructBlipVideoConfig(PretrainedConfig):
r""" r"""
@@ -310,6 +274,11 @@ class InstructBlipVideoConfig(PretrainedConfig):
```""" ```"""
model_type = "instructblipvideo" model_type = "instructblipvideo"
sub_configs = {
"text_config": AutoConfig,
"qformer_config": InstructBlipVideoQFormerConfig,
"vision_config": InstructBlipVideoVisionConfig,
}
def __init__( def __init__(
self, self,

View File

@@ -32,7 +32,7 @@ from transformers.models.instructblip.modeling_instructblip import (
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES from ...models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -103,6 +103,11 @@ class InstructBlipVideoConfig(PretrainedConfig):
```""" ```"""
model_type = "instructblipvideo" model_type = "instructblipvideo"
sub_configs = {
"text_config": AutoConfig,
"qformer_config": InstructBlipVideoQFormerConfig,
"vision_config": InstructBlipVideoVisionConfig,
}
def __init__( def __init__(
self, self,

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""KOSMOS-2 model configuration""" """KOSMOS-2 model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -61,7 +58,7 @@ class Kosmos2TextConfig(PretrainedConfig):
layerdrop (`float`, *optional*, defaults to 0.0): layerdrop (`float`, *optional*, defaults to 0.0):
The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
for more details. for more details.
layer_norm_eps (`float`, *optional*, defaults to 1e-5): layer_norm_eps (`float`, *optional*, defaults to 1e-05):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
init_std (`float`, *optional*, defaults to 0.02): init_std (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
@@ -69,9 +66,16 @@ class Kosmos2TextConfig(PretrainedConfig):
Scale embeddings by diving by sqrt(embed_dim). Scale embeddings by diving by sqrt(embed_dim).
use_cache (`bool`, *optional*, defaults to `True`): use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Whether or not the model should return the last key/values attentions (not used by all models).
pad_token_id (`int`, *optional*, defaults to 1):
Token id used for padding.
bos_token_id (`int`, *optional*, defaults to 0):
Token id used for beginning of string.
eos_token_id (`int`, *optional*, defaults to 2):
Token id used for end of string.
```""" ```"""
model_type = "kosmos_2_text_model" model_type = "kosmos_2_text_model"
base_config_key = "text_config"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = { attribute_map = {
"num_attention_heads": "attention_heads", "num_attention_heads": "attention_heads",
@@ -124,24 +128,6 @@ class Kosmos2TextConfig(PretrainedConfig):
self.scale_embedding = scale_embedding self.scale_embedding = scale_embedding
self.use_cache = use_cache self.use_cache = use_cache
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from Kosmos2Config
if config_dict.get("model_type") == "kosmos-2":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Kosmos2VisionConfig(PretrainedConfig): class Kosmos2VisionConfig(PretrainedConfig):
r""" r"""
@@ -171,18 +157,19 @@ class Kosmos2VisionConfig(PretrainedConfig):
hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`): hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`, The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"selu"` and `"gelu_new"` `"quick_gelu"` are supported. `"relu"`, `"selu"` and `"gelu_new"` `"quick_gelu"` are supported.
layer_norm_eps (`float`, *optional*, defaults to 1e-5): layer_norm_eps (`float`, *optional*, defaults to 1e-05):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
attention_dropout (`float`, *optional*, defaults to 0.0): attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities. The dropout ratio for the attention probabilities.
initializer_range (`float`, *optional*, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
initializer_factor (`float`, *optional*, defaults to 1): initializer_factor (`float`, *optional*, defaults to 1.0):
A factor for initializing all weight matrices (should be kept to 1, used internally for initialization A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
testing). testing).
```""" ```"""
model_type = "kosmos_2_vision_model" model_type = "kosmos_2_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -215,24 +202,6 @@ class Kosmos2VisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Kosmos2Config
if config_dict.get("model_type") == "kosmos-2":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Kosmos2Config(PretrainedConfig): class Kosmos2Config(PretrainedConfig):
r""" r"""
@@ -267,7 +236,7 @@ class Kosmos2Config(PretrainedConfig):
```""" ```"""
model_type = "kosmos-2" model_type = "kosmos-2"
is_composition = True sub_configs = {"text_config": Kosmos2TextConfig, "vision_config": Kosmos2VisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -73,7 +73,7 @@ class LlavaConfig(PretrainedConfig):
```""" ```"""
model_type = "llava" model_type = "llava"
is_composition = True sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -78,7 +78,7 @@ class LlavaNextConfig(PretrainedConfig):
```""" ```"""
model_type = "llava_next" model_type = "llava_next"
is_composition = False sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -21,7 +21,7 @@
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
class LlavaNextVideoConfig(PretrainedConfig): class LlavaNextVideoConfig(PretrainedConfig):
@@ -86,7 +86,7 @@ class LlavaNextVideoConfig(PretrainedConfig):
```""" ```"""
model_type = "llava_next_video" model_type = "llava_next_video"
is_composition = True sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -31,7 +31,7 @@ from ...configuration_utils import PretrainedConfig
from ...utils import ( from ...utils import (
logging, logging,
) )
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -99,7 +99,7 @@ class LlavaNextVideoConfig(PretrainedConfig):
```""" ```"""
model_type = "llava_next_video" model_type = "llava_next_video"
is_composition = True sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -18,7 +18,7 @@ from ...configuration_utils import PretrainedConfig
from ...utils import ( from ...utils import (
logging, logging,
) )
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -81,7 +81,7 @@ class LlavaOnevisionConfig(PretrainedConfig):
```""" ```"""
model_type = "llava_onevision" model_type = "llava_onevision"
is_composition = False sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -13,8 +13,7 @@
# limitations under the License. # limitations under the License.
"""Mllama model configuration""" """Mllama model configuration"""
import os from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...modeling_rope_utils import rope_config_validation from ...modeling_rope_utils import rope_config_validation
@@ -59,7 +58,7 @@ class MllamaVisionConfig(PretrainedConfig):
The size (resolution) of each image *tile*. The size (resolution) of each image *tile*.
patch_size (`int`, *optional*, defaults to 14): patch_size (`int`, *optional*, defaults to 14):
The size (resolution) of each patch. The size (resolution) of each patch.
norm_eps (`float`, *optional*, defaults to 1e-5): norm_eps (`float`, *optional*, defaults to 1e-05):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
max_num_tiles (`int`, *optional*, defaults to 4): max_num_tiles (`int`, *optional*, defaults to 4):
Maximum number of tiles for image splitting. Maximum number of tiles for image splitting.
@@ -88,6 +87,7 @@ class MllamaVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "mllama_vision_model" model_type = "mllama_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -137,23 +137,6 @@ class MllamaVisionConfig(PretrainedConfig):
def max_aspect_ratio_id(self) -> int: def max_aspect_ratio_id(self) -> int:
return len(self.supported_aspect_ratios) return len(self.supported_aspect_ratios)
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "mllama":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class MllamaTextConfig(PretrainedConfig): class MllamaTextConfig(PretrainedConfig):
r""" r"""
@@ -178,12 +161,12 @@ class MllamaTextConfig(PretrainedConfig):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
num_attention_heads (`int`, *optional*, defaults to 32): num_attention_heads (`int`, *optional*, defaults to 32):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
num_key_value_heads (`int`, *optional*): num_key_value_heads (`int`, *optional*, defaults to 8):
This is the number of key_value heads that should be used to implement Grouped Query Attention. If not This is the number of key_value heads that should be used to implement Grouped Query Attention. If not
specified, will default to `num_attention_heads`. specified, will default to `num_attention_heads`.
intermediate_size (`int`, *optional*, defaults to 14336): intermediate_size (`int`, *optional*, defaults to 14336):
Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder. Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
rope_theta (`float`, *optional*, defaults to 500000.0): rope_theta (`float`, *optional*, defaults to `500000.0`):
The base period of the RoPE embeddings. The base period of the RoPE embeddings.
rope_scaling (`Dict`, *optional*): rope_scaling (`Dict`, *optional*):
Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
@@ -259,6 +242,7 @@ class MllamaTextConfig(PretrainedConfig):
```""" ```"""
model_type = "mllama_text_model" model_type = "mllama_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -311,23 +295,6 @@ class MllamaTextConfig(PretrainedConfig):
**kwargs, **kwargs,
) )
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "mllama":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class MllamaConfig(PretrainedConfig): class MllamaConfig(PretrainedConfig):
r""" r"""
@@ -370,7 +337,7 @@ class MllamaConfig(PretrainedConfig):
```""" ```"""
model_type = "mllama" model_type = "mllama"
is_composition = True sub_configs = {"text_config": MllamaTextConfig, "vision_config": MllamaVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -235,8 +235,8 @@ class MoshiConfig(PretrainedConfig):
```""" ```"""
model_type = "moshi" model_type = "moshi"
is_composition = True
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
sub_configs = {"audio_encoder_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -41,22 +41,22 @@ class MptAttentionConfig(PretrainedConfig):
Args: Args:
attn_type (`str`, *optional*, defaults to `"multihead_attention"`): attn_type (`str`, *optional*, defaults to `"multihead_attention"`):
type of attention to use. Options: `"multihead_attention"`, `"multiquery_attention"`. type of attention to use. Options: `"multihead_attention"`, `"multiquery_attention"`.
attn_pdrop (`float`, *optional*, defaults to 0.0): attn_pdrop (`float`, *optional*, defaults to `0.0`):
The dropout probability for the attention layers. The dropout probability for the attention layers.
attn_impl (`str`, *optional*, defaults to `"torch"`): attn_impl (`str`, *optional*, defaults to `"torch"`):
The attention implementation to use. One of `"torch"`, `"flash"`, or `"triton"`. The attention implementation to use. One of `"torch"`, `"flash"`, or `"triton"`.
clip_qkv (`float`, *optional*): clip_qkv (`float`, *optional*):
If not `None`, clip the queries, keys, and values in the attention layer to this value. If not `None`, clip the queries, keys, and values in the attention layer to this value.
softmax_scale (`float`, *optional*, defaults to `None`): softmax_scale (`float`, *optional*):
If not `None`, scale the softmax in the attention layer by this value. If `None`, will default to If not `None`, scale the softmax in the attention layer by this value. If `None`, will default to
`1/sqrt(hidden_size)`. `1/sqrt(hidden_size)`.
prefix_lm (`bool`, *optional*, defaults to `False`)): prefix_lm (`bool`, *optional*, defaults to `False`):
Whether the model should operate as a Prefix LM. This requires passing an extra `prefix_mask` argument Whether the model should operate as a Prefix LM. This requires passing an extra `prefix_mask` argument
which indicates which tokens belong to the prefix. Tokens in the prefix can attend to one another which indicates which tokens belong to the prefix. Tokens in the prefix can attend to one another
bi-directionally. Tokens outside the prefix use causal attention. bi-directionally. Tokens outside the prefix use causal attention.
qk_ln (`bool`, *optional*, defaults to `False`): qk_ln (`bool`, *optional*, defaults to `False`):
Whether to apply layer normalization to the queries and keys in the attention layer. Whether to apply layer normalization to the queries and keys in the attention layer.
attn_uses_sequence_id (`bool`, *optional*, defaults to `False`)): attn_uses_sequence_id (`bool`, *optional*, defaults to `False`):
Whether to restrict attention to tokens that have the same token_type_ids. When the model is in `train` Whether to restrict attention to tokens that have the same token_type_ids. When the model is in `train`
mode, this requires passing an extra *token_type_ids* argument which indicates which sub-sequence each mode, this requires passing an extra *token_type_ids* argument which indicates which sub-sequence each
token belongs to. Defaults to `False` meaning any provided *token_type_ids* will be ignored. token belongs to. Defaults to `False` meaning any provided *token_type_ids* will be ignored.
@@ -66,6 +66,8 @@ class MptAttentionConfig(PretrainedConfig):
The maximum value of the alibi bias. The maximum value of the alibi bias.
""" """
base_config_key = "attn_config"
def __init__( def __init__(
self, self,
attn_type="multihead_attention", attn_type="multihead_attention",
@@ -97,23 +99,6 @@ class MptAttentionConfig(PretrainedConfig):
f"`attn_type` has to be either `multihead_attention` or `multiquery_attention`. Received: {attn_type}" f"`attn_type` has to be either `multihead_attention` or `multiquery_attention`. Received: {attn_type}"
) )
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "mpt":
config_dict = config_dict["attn_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class MptConfig(PretrainedConfig): class MptConfig(PretrainedConfig):
""" """
@@ -188,6 +173,7 @@ class MptConfig(PretrainedConfig):
""" """
model_type = "mpt" model_type = "mpt"
sub_configs = {"attn_config": MptAttentionConfig}
attribute_map = { attribute_map = {
"num_attention_heads": "n_heads", "num_attention_heads": "n_heads",
"hidden_size": "d_model", "hidden_size": "d_model",

View File

@@ -76,6 +76,7 @@ class MusicgenDecoderConfig(PretrainedConfig):
""" """
model_type = "musicgen_decoder" model_type = "musicgen_decoder"
base_config_key = "decoder_config"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
def __init__( def __init__(
@@ -189,6 +190,11 @@ class MusicgenConfig(PretrainedConfig):
```""" ```"""
model_type = "musicgen" model_type = "musicgen"
sub_configs = {
"text_encoder": AutoConfig,
"audio_encoder": AutoConfig,
"decoder": MusicgenDecoderConfig,
}
is_composition = True is_composition = True
def __init__(self, **kwargs): def __init__(self, **kwargs):

View File

@@ -78,6 +78,7 @@ class MusicgenMelodyDecoderConfig(PretrainedConfig):
""" """
model_type = "musicgen_melody_decoder" model_type = "musicgen_melody_decoder"
base_config_key = "decoder_config"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
def __init__( def __init__(
@@ -195,6 +196,11 @@ class MusicgenMelodyConfig(PretrainedConfig):
```""" ```"""
model_type = "musicgen_melody" model_type = "musicgen_melody"
sub_configs = {
"text_encoder": AutoConfig,
"audio_encoder": AutoConfig,
"decoder": MusicgenMelodyDecoderConfig,
}
is_composition = True is_composition = True
def __init__( def __init__(

View File

@@ -14,8 +14,7 @@
# limitations under the License. # limitations under the License.
"""OWLv2 model configuration""" """OWLv2 model configuration"""
import os from typing import TYPE_CHECKING, Dict
from typing import TYPE_CHECKING, Dict, Union
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -90,6 +89,7 @@ class Owlv2TextConfig(PretrainedConfig):
```""" ```"""
model_type = "owlv2_text_model" model_type = "owlv2_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -123,24 +123,6 @@ class Owlv2TextConfig(PretrainedConfig):
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from Owlv2Config
if config_dict.get("model_type") == "owlv2":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTVisionConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2, 32->16 # Copied from transformers.models.owlvit.configuration_owlvit.OwlViTVisionConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2, 32->16
class Owlv2VisionConfig(PretrainedConfig): class Owlv2VisionConfig(PretrainedConfig):
@@ -197,6 +179,7 @@ class Owlv2VisionConfig(PretrainedConfig):
```""" ```"""
model_type = "owlv2_vision_model" model_type = "owlv2_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -229,24 +212,6 @@ class Owlv2VisionConfig(PretrainedConfig):
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from Owlv2Config
if config_dict.get("model_type") == "owlv2":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2 # Copied from transformers.models.owlvit.configuration_owlvit.OwlViTConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
class Owlv2Config(PretrainedConfig): class Owlv2Config(PretrainedConfig):
@@ -276,6 +241,7 @@ class Owlv2Config(PretrainedConfig):
""" """
model_type = "owlv2" model_type = "owlv2"
sub_configs = {"text_config": Owlv2TextConfig, "vision_config": Owlv2VisionConfig}
def __init__( def __init__(
self, self,
@@ -304,20 +270,6 @@ class Owlv2Config(PretrainedConfig):
self.return_dict = return_dict self.return_dict = return_dict
self.initializer_factor = 1.0 self.initializer_factor = 1.0
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
@classmethod @classmethod
def from_text_vision_configs(cls, text_config: Dict, vision_config: Dict, **kwargs): def from_text_vision_configs(cls, text_config: Dict, vision_config: Dict, **kwargs):
r""" r"""

View File

@@ -14,9 +14,8 @@
# limitations under the License. # limitations under the License.
"""OWL-ViT model configuration""" """OWL-ViT model configuration"""
import os
from collections import OrderedDict from collections import OrderedDict
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Union from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -92,6 +91,7 @@ class OwlViTTextConfig(PretrainedConfig):
```""" ```"""
model_type = "owlvit_text_model" model_type = "owlvit_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -125,24 +125,6 @@ class OwlViTTextConfig(PretrainedConfig):
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from OwlViTConfig
if config_dict.get("model_type") == "owlvit":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class OwlViTVisionConfig(PretrainedConfig): class OwlViTVisionConfig(PretrainedConfig):
r""" r"""
@@ -198,6 +180,7 @@ class OwlViTVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "owlvit_vision_model" model_type = "owlvit_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -230,24 +213,6 @@ class OwlViTVisionConfig(PretrainedConfig):
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from OwlViTConfig
if config_dict.get("model_type") == "owlvit":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class OwlViTConfig(PretrainedConfig): class OwlViTConfig(PretrainedConfig):
r""" r"""
@@ -276,6 +241,7 @@ class OwlViTConfig(PretrainedConfig):
""" """
model_type = "owlvit" model_type = "owlvit"
sub_configs = {"text_config": OwlViTTextConfig, "vision_config": OwlViTVisionConfig}
def __init__( def __init__(
self, self,
@@ -304,20 +270,6 @@ class OwlViTConfig(PretrainedConfig):
self.return_dict = return_dict self.return_dict = return_dict
self.initializer_factor = 1.0 self.initializer_factor = 1.0
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
@classmethod @classmethod
def from_text_vision_configs(cls, text_config: Dict, vision_config: Dict, **kwargs): def from_text_vision_configs(cls, text_config: Dict, vision_config: Dict, **kwargs):
r""" r"""

View File

@@ -17,7 +17,7 @@ import warnings
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -73,7 +73,7 @@ class PaliGemmaConfig(PretrainedConfig):
```""" ```"""
model_type = "paligemma" model_type = "paligemma"
is_composition = False sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -157,7 +157,7 @@ class Qwen2AudioConfig(PretrainedConfig):
```""" ```"""
model_type = "qwen2_audio" model_type = "qwen2_audio"
is_composition = False sub_configs = {"text_config": AutoConfig, "audio_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""Qwen2VL model configuration""" """Qwen2VL model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...modeling_rope_utils import rope_config_validation from ...modeling_rope_utils import rope_config_validation
from ...utils import logging from ...utils import logging
@@ -27,6 +24,7 @@ logger = logging.get_logger(__name__)
class Qwen2VLVisionConfig(PretrainedConfig): class Qwen2VLVisionConfig(PretrainedConfig):
model_type = "qwen2_vl" model_type = "qwen2_vl"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -55,23 +53,6 @@ class Qwen2VLVisionConfig(PretrainedConfig):
self.spatial_merge_size = spatial_merge_size self.spatial_merge_size = spatial_merge_size
self.temporal_patch_size = temporal_patch_size self.temporal_patch_size = temporal_patch_size
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type") == "qwen2_vl":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class Qwen2VLConfig(PretrainedConfig): class Qwen2VLConfig(PretrainedConfig):
r""" r"""
@@ -180,6 +161,7 @@ class Qwen2VLConfig(PretrainedConfig):
```""" ```"""
model_type = "qwen2_vl" model_type = "qwen2_vl"
sub_configs = {"vision_config": Qwen2VLVisionConfig}
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
def __init__( def __init__(

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""Siglip model configuration""" """Siglip model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -79,6 +76,7 @@ class SiglipTextConfig(PretrainedConfig):
```""" ```"""
model_type = "siglip_text_model" model_type = "siglip_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -110,24 +108,6 @@ class SiglipTextConfig(PretrainedConfig):
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.attention_dropout = attention_dropout self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from SiglipConfig
if config_dict.get("model_type") == "siglip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class SiglipVisionConfig(PretrainedConfig): class SiglipVisionConfig(PretrainedConfig):
r""" r"""
@@ -178,6 +158,7 @@ class SiglipVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "siglip_vision_model" model_type = "siglip_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -206,24 +187,6 @@ class SiglipVisionConfig(PretrainedConfig):
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act self.hidden_act = hidden_act
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from SiglipConfig
if config_dict.get("model_type") == "siglip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class SiglipConfig(PretrainedConfig): class SiglipConfig(PretrainedConfig):
r""" r"""
@@ -268,6 +231,7 @@ class SiglipConfig(PretrainedConfig):
```""" ```"""
model_type = "siglip" model_type = "siglip"
sub_configs = {"text_config": SiglipTextConfig, "vision_config": SiglipVisionConfig}
def __init__(self, text_config=None, vision_config=None, **kwargs): def __init__(self, text_config=None, vision_config=None, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)

View File

@@ -71,6 +71,7 @@ class SpeechEncoderDecoderConfig(PretrainedConfig):
```""" ```"""
model_type = "speech-encoder-decoder" model_type = "speech-encoder-decoder"
sub_configs = {"encoder": AutoConfig, "decoder": AutoConfig}
is_composition = True is_composition = True
def __init__(self, **kwargs): def __init__(self, **kwargs):

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -78,7 +78,7 @@ class VideoLlavaConfig(PretrainedConfig):
```""" ```"""
model_type = "video_llava" model_type = "video_llava"
is_composition = False sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -15,7 +15,7 @@
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
from ..auto import CONFIG_MAPPING from ..auto import CONFIG_MAPPING, AutoConfig
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
@@ -72,7 +72,7 @@ class VipLlavaConfig(PretrainedConfig):
```""" ```"""
model_type = "vipllava" model_type = "vipllava"
is_composition = False sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__( def __init__(
self, self,

View File

@@ -78,6 +78,7 @@ class VisionEncoderDecoderConfig(PretrainedConfig):
```""" ```"""
model_type = "vision-encoder-decoder" model_type = "vision-encoder-decoder"
sub_configs = {"encoder": AutoConfig, "decoder": AutoConfig}
is_composition = True is_composition = True
def __init__(self, **kwargs): def __init__(self, **kwargs):

View File

@@ -75,6 +75,7 @@ class VisionTextDualEncoderConfig(PretrainedConfig):
```""" ```"""
model_type = "vision-text-dual-encoder" model_type = "vision-text-dual-encoder"
sub_configs = {"vision_config": AutoConfig, "text_config": AutoConfig}
is_composition = True is_composition = True
def __init__(self, projection_dim=512, logit_scale_init_value=2.6592, **kwargs): def __init__(self, projection_dim=512, logit_scale_init_value=2.6592, **kwargs):

View File

@@ -14,9 +14,6 @@
# limitations under the License. # limitations under the License.
"""X-CLIP model configuration""" """X-CLIP model configuration"""
import os
from typing import Union
from ...configuration_utils import PretrainedConfig from ...configuration_utils import PretrainedConfig
from ...utils import logging from ...utils import logging
@@ -79,6 +76,7 @@ class XCLIPTextConfig(PretrainedConfig):
```""" ```"""
model_type = "xclip_text_model" model_type = "xclip_text_model"
base_config_key = "text_config"
def __init__( def __init__(
self, self,
@@ -112,24 +110,6 @@ class XCLIPTextConfig(PretrainedConfig):
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout self.attention_dropout = attention_dropout
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the text config dict if we are loading from XCLIPConfig
if config_dict.get("model_type") == "xclip":
config_dict = config_dict["text_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class XCLIPVisionConfig(PretrainedConfig): class XCLIPVisionConfig(PretrainedConfig):
r""" r"""
@@ -195,6 +175,7 @@ class XCLIPVisionConfig(PretrainedConfig):
```""" ```"""
model_type = "xclip_vision_model" model_type = "xclip_vision_model"
base_config_key = "vision_config"
def __init__( def __init__(
self, self,
@@ -239,24 +220,6 @@ class XCLIPVisionConfig(PretrainedConfig):
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.drop_path_rate = drop_path_rate self.drop_path_rate = drop_path_rate
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
cls._set_token_in_kwargs(kwargs)
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
# get the vision config dict if we are loading from XCLIPConfig
if config_dict.get("model_type") == "xclip":
config_dict = config_dict["vision_config"]
if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs)
class XCLIPConfig(PretrainedConfig): class XCLIPConfig(PretrainedConfig):
r""" r"""
@@ -295,6 +258,7 @@ class XCLIPConfig(PretrainedConfig):
""" """
model_type = "xclip" model_type = "xclip"
sub_configs = {"text_config": XCLIPTextConfig, "vision_config": XCLIPVisionConfig}
def __init__( def __init__(
self, self,

View File

@@ -457,11 +457,20 @@ class AlignModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = AlignModelTester(self) self.model_tester = AlignModelTester(self)
self.config_tester = ConfigTester(
self,
config_class=AlignConfig,
has_text_modality=False,
common_properties=["projection_dim", "temperature_init_value"],
)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Start to fail after using torch `cu118`.") @unittest.skip(reason="Start to fail after using torch `cu118`.")
def test_multi_gpu_data_parallel_forward(self): def test_multi_gpu_data_parallel_forward(self):
super().test_multi_gpu_data_parallel_forward() super().test_multi_gpu_data_parallel_forward()

View File

@@ -452,11 +452,20 @@ class AltCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def setUp(self): def setUp(self):
self.model_tester = AltCLIPModelTester(self) self.model_tester = AltCLIPModelTester(self)
self.config_tester = ConfigTester(
self,
config_class=AltCLIPConfig,
has_text_modality=False,
common_properties=["projection_dim", "logit_scale_init_value"],
)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests") @unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self): def test_hidden_states_output(self):
pass pass

View File

@@ -449,11 +449,18 @@ class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = BlipModelTester(self) self.model_tester = BlipModelTester(self)
common_properties = ["logit_scale_init_value", "image_text_hidden_size", "projection_dim", "label_smoothing"]
self.config_tester = ConfigTester(
self, config_class=BlipConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests") @unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self): def test_hidden_states_output(self):
pass pass

View File

@@ -482,6 +482,13 @@ class Blip2ForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, GenerationT
def setUp(self): def setUp(self):
self.model_tester = Blip2ForConditionalGenerationDecoderOnlyModelTester(self) self.model_tester = Blip2ForConditionalGenerationDecoderOnlyModelTester(self)
common_properties = ["image_token_index", "num_query_tokens", "image_text_hidden_size"]
self.config_tester = ConfigTester(
self, config_class=Blip2Config, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_for_conditional_generation(self): def test_for_conditional_generation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -515,11 +515,18 @@ class ClapModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = ClapModelTester(self) self.model_tester = ClapModelTester(self)
common_properties = ["logit_scale_init_value", "projection_hidden_act", "projection_dim"]
self.config_tester = ConfigTester(
self, config_class=ClapConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests") @unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self): def test_hidden_states_output(self):
pass pass

View File

@@ -745,11 +745,18 @@ class CLIPModelTest(CLIPModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def setUp(self): def setUp(self):
self.model_tester = CLIPModelTester(self) self.model_tester = CLIPModelTester(self)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=CLIPConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests") @unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self): def test_hidden_states_output(self):
pass pass

View File

@@ -472,11 +472,18 @@ class CLIPSegModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def setUp(self): def setUp(self):
self.model_tester = CLIPSegModelTester(self) self.model_tester = CLIPSegModelTester(self)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=CLIPSegConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
def test_model_for_image_segmentation(self): def test_model_for_image_segmentation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model_for_image_segmentation(*config_and_inputs) self.model_tester.create_and_check_model_for_image_segmentation(*config_and_inputs)

View File

@@ -414,7 +414,13 @@ class ClvpModelForConditionalGenerationTest(ModelTesterMixin, unittest.TestCase)
def setUp(self): def setUp(self):
self.model_tester = ClvpModelForConditionalGenerationTester(self) self.model_tester = ClvpModelForConditionalGenerationTester(self)
self.clvp_config_tester = ConfigTester(self, config_class=ClvpConfig, hidden_size=32) common_properties = ["projection_dim", "logit_scale_init_value"]
self.clvp_config_tester = ConfigTester(
self, config_class=ClvpConfig, has_text_modality=False, common_properties=common_properties, hidden_size=32
)
def test_config(self):
self.clvp_config_tester.run_common_tests()
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()

View File

@@ -931,11 +931,18 @@ class FlavaModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = self.class_for_tester(self) self.model_tester = self.class_for_tester(self)
common_properties = ["projection_dim", "logit_scale_init_value", "init_codebook"]
self.config_tester = ConfigTester(
self, config_class=FlavaConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="tested in individual model tests") @unittest.skip(reason="tested in individual model tests")
def test_hidden_states_output(self): def test_hidden_states_output(self):
pass pass

View File

@@ -559,11 +559,18 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def setUp(self): def setUp(self):
self.model_tester = GroupViTModelTester(self) self.model_tester = GroupViTModelTester(self)
common_properties = ["projection_dim", "projection_intermediate_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=GroupViTConfig, has_text_modality=False, common_properties=common_properties
)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="hidden_states are tested in individual model tests") @unittest.skip(reason="hidden_states are tested in individual model tests")
def test_hidden_states_output(self): def test_hidden_states_output(self):
pass pass

View File

@@ -185,7 +185,12 @@ class Idefics2ModelTest(ModelTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = Idefics2VisionText2TextModelTester(self) self.model_tester = Idefics2VisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=Idefics2Config, has_text_modality=False) self.config_tester = ConfigTester(
self, config_class=Idefics2Config, has_text_modality=False, common_properties=["image_token_id"]
)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="input_embeds cannot be passed in without input_ids") @unittest.skip(reason="input_embeds cannot be passed in without input_ids")
def test_inputs_embeds(): def test_inputs_embeds():

View File

@@ -168,7 +168,12 @@ class Idefics3ModelTest(ModelTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = Idefics3VisionText2TextModelTester(self) self.model_tester = Idefics3VisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=Idefics3Config, has_text_modality=False) self.config_tester = ConfigTester(
self, config_class=Idefics3Config, has_text_modality=False, common_properties=["image_token_id"]
)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="input_embeds cannot be passed in without input_ids") @unittest.skip(reason="input_embeds cannot be passed in without input_ids")
def test_inputs_embeds(): def test_inputs_embeds():

View File

@@ -486,6 +486,15 @@ class InstructBlipForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, Gene
def setUp(self): def setUp(self):
self.model_tester = InstructBlipForConditionalGenerationDecoderOnlyModelTester(self) self.model_tester = InstructBlipForConditionalGenerationDecoderOnlyModelTester(self)
self.config_tester = ConfigTester(
self,
config_class=InstructBlipConfig,
has_text_modality=False,
common_properties=["num_query_tokens", "image_token_index"],
)
def test_config(self):
self.config_tester.run_common_tests()
def test_for_conditional_generation(self): def test_for_conditional_generation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -510,11 +510,18 @@ class InstructBlipVideoForConditionalGenerationDecoderOnlyTest(
def setUp(self): def setUp(self):
self.model_tester = InstructBlipVideoForConditionalGenerationDecoderOnlyModelTester(self) self.model_tester = InstructBlipVideoForConditionalGenerationDecoderOnlyModelTester(self)
common_properties = ["num_query_tokens", "video_token_index"]
self.config_tester = ConfigTester(
self, config_class=InstructBlipVideoConfig, has_text_modality=False, common_properties=common_properties
)
def test_for_conditional_generation(self): def test_for_conditional_generation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_conditional_generation(*config_and_inputs) self.model_tester.create_and_check_for_conditional_generation(*config_and_inputs)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip(reason="Hidden_states is tested in individual model tests") @unittest.skip(reason="Hidden_states is tested in individual model tests")
def test_hidden_states_output(self): def test_hidden_states_output(self):
pass pass

View File

@@ -304,7 +304,12 @@ class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def setUp(self): def setUp(self):
self.model_tester = Kosmos2ModelTester(self) self.model_tester = Kosmos2ModelTester(self)
self.config_tester = ConfigTester(self, config_class=Kosmos2Config, hidden_size=37) self.config_tester = ConfigTester(
self, config_class=Kosmos2Config, has_text_modality=False, common_properties=["latent_query_num"]
)
def test_config(self):
self.config_tester.run_common_tests()
# overwrite from common to skip `image_to_text_projection.latent_query` # overwrite from common to skip `image_to_text_projection.latent_query`
def test_initialization(self): def test_initialization(self):

View File

@@ -194,7 +194,13 @@ class LlavaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTesterM
def setUp(self): def setUp(self):
self.model_tester = LlavaVisionText2TextModelTester(self) self.model_tester = LlavaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=LlavaConfig, has_text_modality=False) common_properties = ["image_token_index", "vision_feature_layer", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=LlavaConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
# overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs # overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs
def test_inputs_embeds(self): def test_inputs_embeds(self):

View File

@@ -223,7 +223,13 @@ class LlavaNextForConditionalGenerationModelTest(ModelTesterMixin, GenerationTes
def setUp(self): def setUp(self):
self.model_tester = LlavaNextVisionText2TextModelTester(self) self.model_tester = LlavaNextVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=LlavaNextConfig, has_text_modality=False) common_properties = ["image_token_index", "vision_feature_layer", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=LlavaNextConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization(self): def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -240,7 +240,13 @@ class LlavaNextVideoForConditionalGenerationModelTest(ModelTesterMixin, Generati
def setUp(self): def setUp(self):
self.model_tester = LlavaNextVideoVisionText2TextModelTester(self) self.model_tester = LlavaNextVideoVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=LlavaNextVideoConfig, has_text_modality=False) common_properties = ["image_token_index", "video_token_index", "vision_feature_layer", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=LlavaNextVideoConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization(self): def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -226,7 +226,13 @@ class LlavaOnevisionForConditionalGenerationModelTest(ModelTesterMixin, Generati
def setUp(self): def setUp(self):
self.model_tester = LlavaOnevisionVisionText2TextModelTester(self) self.model_tester = LlavaOnevisionVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=LlavaOnevisionConfig, has_text_modality=False) common_properties = ["image_token_index", "video_token_index", "vision_feature_layer"]
self.config_tester = ConfigTester(
self, config_class=LlavaOnevisionConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization(self): def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -272,7 +272,12 @@ class MllamaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTester
def setUp(self): def setUp(self):
self.model_tester = MllamaVisionText2TextModelTester(self) self.model_tester = MllamaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=MllamaConfig, has_text_modality=False) self.config_tester = ConfigTester(
self, config_class=MllamaConfig, has_text_modality=False, common_properties=["image_token_index"]
)
def test_config(self):
self.config_tester.run_common_tests()
# overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs # overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs
def test_inputs_embeds(self): def test_inputs_embeds(self):

View File

@@ -447,6 +447,13 @@ class Owlv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = Owlv2ModelTester(self) self.model_tester = Owlv2ModelTester(self)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=Owlv2Config, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -442,6 +442,13 @@ class OwlViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = OwlViTModelTester(self) self.model_tester = OwlViTModelTester(self)
common_properties = ["projection_dim", "logit_scale_init_value"]
self.config_tester = ConfigTester(
self, config_class=OwlViTConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -232,6 +232,9 @@ class Qwen2VLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCas
self.model_tester = Qwen2VLVisionText2TextModelTester(self) self.model_tester = Qwen2VLVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=Qwen2VLConfig, has_text_modality=False) self.config_tester = ConfigTester(self, config_class=Qwen2VLConfig, has_text_modality=False)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization(self): def test_initialization(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -667,9 +667,12 @@ class SiglipModelTest(SiglipModelTesterMixin, PipelineTesterMixin, unittest.Test
test_disk_offload_bin = False test_disk_offload_bin = False
_is_composite = True _is_composite = True
# Copied from tests.models.clip.test_modeling_clip.CLIPModelTest.setUp with CLIP->Siglip
def setUp(self): def setUp(self):
self.model_tester = SiglipModelTester(self) self.model_tester = SiglipModelTester(self)
self.config_tester = ConfigTester(self, config_class=SiglipConfig, has_text_modality=False)
def test_config(self):
self.config_tester.run_common_tests()
# Copied from tests.models.clip.test_modeling_clip.CLIPModelTest.test_model # Copied from tests.models.clip.test_modeling_clip.CLIPModelTest.test_model
def test_model(self): def test_model(self):

View File

@@ -217,7 +217,13 @@ class VideoLlavaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTe
def setUp(self): def setUp(self):
self.model_tester = VideoLlavaVisionText2TextModelTester(self) self.model_tester = VideoLlavaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=VideoLlavaConfig, has_text_modality=False) common_properties = ["image_token_index", "video_token_index", "vision_feature_layer", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=VideoLlavaConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
@unittest.skip( @unittest.skip(
reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124" reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"

View File

@@ -179,7 +179,13 @@ class VipLlavaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTest
def setUp(self): def setUp(self):
self.model_tester = VipLlavaVisionText2TextModelTester(self) self.model_tester = VipLlavaVisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=VipLlavaConfig, has_text_modality=False) common_properties = ["image_token_index", "vision_feature_layers", "image_seq_length"]
self.config_tester = ConfigTester(
self, config_class=VipLlavaConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
# overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs # overwrite inputs_embeds tests because we need to delete "pixel values" for LVLMs
def test_inputs_embeds(self): def test_inputs_embeds(self):

View File

@@ -547,6 +547,13 @@ class XCLIPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def setUp(self): def setUp(self):
self.model_tester = XCLIPModelTester(self) self.model_tester = XCLIPModelTester(self)
common_properties = ["projection_dim", "prompt_layers", "prompt_num_attention_heads"]
self.config_tester = ConfigTester(
self, config_class=XCLIPConfig, has_text_modality=False, common_properties=common_properties
)
def test_config(self):
self.config_tester.run_common_tests()
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()

View File

@@ -17,12 +17,17 @@ import copy
import json import json
import os import os
import tempfile import tempfile
from pathlib import Path
from transformers import is_torch_available from transformers import is_torch_available
from transformers.utils import direct_transformers_import
from .utils.test_configuration_utils import config_common_kwargs from .utils.test_configuration_utils import config_common_kwargs
transformers_module = direct_transformers_import(Path(__file__).parent)
class ConfigTester: class ConfigTester:
def __init__(self, parent, config_class=None, has_text_modality=True, common_properties=None, **kwargs): def __init__(self, parent, config_class=None, has_text_modality=True, common_properties=None, **kwargs):
self.parent = parent self.parent = parent
@@ -35,9 +40,10 @@ class ConfigTester:
config = self.config_class(**self.inputs_dict) config = self.config_class(**self.inputs_dict)
common_properties = ( common_properties = (
["hidden_size", "num_attention_heads", "num_hidden_layers"] ["hidden_size", "num_attention_heads", "num_hidden_layers"]
if self.common_properties is None if self.common_properties is None and not self.config_class.sub_configs
else self.common_properties else self.common_properties
) )
common_properties = [] if common_properties is None else common_properties
# Add common fields for text models # Add common fields for text models
if self.has_text_modality: if self.has_text_modality:
@@ -110,6 +116,44 @@ class ConfigTester:
self.parent.assertEqual(config_second.to_dict(), config_first.to_dict()) self.parent.assertEqual(config_second.to_dict(), config_first.to_dict())
def create_and_test_config_from_and_save_pretrained_composite(self):
"""
Tests that composite or nested cofigs can be loaded and saved correctly. In case the config
has a sub-config, we should be able to call `sub_config.from_pretrained('general_config_file')`
and get a result same as if we loaded the whole config and obtained `config.sub_config` from it.
"""
config = self.config_class(**self.inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
config.save_pretrained(tmpdirname)
general_config_loaded = self.config_class.from_pretrained(tmpdirname)
general_config_dict = config.to_dict()
# Iterate over all sub_configs if there are any and load them with their own classes
sub_configs = self.config_class.sub_configs
for sub_config_key, sub_class in sub_configs.items():
if sub_class.__name__ == "AutoConfig":
sub_class = sub_class.for_model(**general_config_dict[sub_config_key]).__class__
sub_config_loaded = sub_class.from_pretrained(tmpdirname)
else:
sub_config_loaded = sub_class.from_pretrained(tmpdirname)
# Pop `transformers_version`, it never exists when a config is part of a general composite config
# Verify that loading with subconfig class results in same dict as if we loaded with general composite config class
sub_config_loaded_dict = sub_config_loaded.to_dict()
sub_config_loaded_dict.pop("transformers_version", None)
self.parent.assertEqual(sub_config_loaded_dict, general_config_dict[sub_config_key])
# Verify that the loaded config type is same as in the general config
type_from_general_config = type(getattr(general_config_loaded, sub_config_key))
self.parent.assertTrue(isinstance(sub_config_loaded, type_from_general_config))
# Now save only the sub-config and load it back to make sure the whole load-save-load pipeline works
with tempfile.TemporaryDirectory() as tmpdirname2:
sub_config_loaded.save_pretrained(tmpdirname2)
sub_config_loaded_2 = sub_class.from_pretrained(tmpdirname2)
self.parent.assertEqual(sub_config_loaded.to_dict(), sub_config_loaded_2.to_dict())
def create_and_test_config_with_num_labels(self): def create_and_test_config_with_num_labels(self):
config = self.config_class(**self.inputs_dict, num_labels=5) config = self.config_class(**self.inputs_dict, num_labels=5)
self.parent.assertEqual(len(config.id2label), 5) self.parent.assertEqual(len(config.id2label), 5)
@@ -128,6 +172,9 @@ class ConfigTester:
self.parent.assertIsNotNone(config) self.parent.assertIsNotNone(config)
def check_config_arguments_init(self): def check_config_arguments_init(self):
if self.config_class.sub_configs:
return # TODO: @raushan composite models are not consistent in how they set general params
kwargs = copy.deepcopy(config_common_kwargs) kwargs = copy.deepcopy(config_common_kwargs)
config = self.config_class(**kwargs) config = self.config_class(**kwargs)
wrong_values = [] wrong_values = []
@@ -153,6 +200,7 @@ class ConfigTester:
self.create_and_test_config_to_json_file() self.create_and_test_config_to_json_file()
self.create_and_test_config_from_and_save_pretrained() self.create_and_test_config_from_and_save_pretrained()
self.create_and_test_config_from_and_save_pretrained_subfolder() self.create_and_test_config_from_and_save_pretrained_subfolder()
self.create_and_test_config_from_and_save_pretrained_composite()
self.create_and_test_config_with_num_labels() self.create_and_test_config_with_num_labels()
self.check_config_can_be_init_without_params() self.check_config_can_be_init_without_params()
self.check_config_arguments_init() self.check_config_arguments_init()

View File

@@ -3802,22 +3802,18 @@ class ModelTesterMixin:
self.skipTest("Model is not a composite model.") self.skipTest("Model is not a composite model.")
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
sub_configs = {
key: getattr(config, key) for key in config if isinstance(getattr(config, key), PretrainedConfig)
}
# set eager as it will be the one supported in all models # set eager as it will be the one supported in all models
# we just need to test if passing 'attn_implementation' as a dict fails or not # we just need to test if passing 'attn_implementation' as a dict fails or not
attn_implementation_per_subconfig = {} attn_implementation_per_subconfig = {}
for key, sub_config in sub_configs.items(): for key in config.sub_configs.keys():
attn_implementation_per_subconfig[key] = "eager" attn_implementation_per_subconfig[key] = "eager"
config._attn_implementation = attn_implementation_per_subconfig config._attn_implementation = attn_implementation_per_subconfig
model = model_class(config) model = model_class(config)
for key in model.config: for key in config.sub_configs.keys():
if isinstance(getattr(model.config, key), PretrainedConfig): sub_config = getattr(model.config, key)
sub_config = getattr(model.config, key) self.assertTrue(sub_config._attn_implementation == "eager")
self.assertTrue(sub_config._attn_implementation == "eager")
for name, submodule in model.named_modules(): for name, submodule in model.named_modules():
class_name = submodule.__class__.__name__ class_name = submodule.__class__.__name__
@@ -3826,7 +3822,7 @@ class ModelTesterMixin:
or "SdpaSelfAttention" in class_name or "SdpaSelfAttention" in class_name
or "FlashAttention" in class_name or "FlashAttention" in class_name
): ):
raise ValueError("The eager model should not have SDPA/FA2 attention layers") raise ValueError(f"The eager model should not have SDPA/FA2 attention layers but got {class_name}")
@require_torch_sdpa @require_torch_sdpa
def test_sdpa_can_dispatch_non_composite_models(self): def test_sdpa_can_dispatch_non_composite_models(self):