From b56d721397028a562b1e6444a8e493171e659202 Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Mon, 28 Jul 2025 12:38:48 +0200 Subject: [PATCH] [configuration] remove redundant `classmethod` (#38812) * remove redundant classmethod * warning message, add space between words * fix tests * fix copies --- src/transformers/configuration_utils.py | 36 +++++++++++++++++++ .../models/aimv2/configuration_aimv2.py | 12 ------- .../models/align/configuration_align.py | 12 ------- .../models/altclip/configuration_altclip.py | 12 ------- .../models/blip/configuration_blip.py | 12 ------- .../bridgetower/configuration_bridgetower.py | 11 ------ .../configuration_chinese_clip.py | 12 ------- .../models/clap/configuration_clap.py | 12 ------- .../models/clip/configuration_clip.py | 12 ------- .../models/clipseg/configuration_clipseg.py | 12 ------- .../models/groupvit/configuration_groupvit.py | 12 ------- .../pix2struct/configuration_pix2struct.py | 14 -------- .../models/siglip/configuration_siglip.py | 12 ------- .../models/siglip2/configuration_siglip2.py | 12 ------- .../models/x_clip/configuration_x_clip.py | 12 ------- tests/models/align/test_modeling_align.py | 6 ++-- tests/models/altclip/test_modeling_altclip.py | 6 ++-- tests/models/blip/test_modeling_blip.py | 24 ++++++++----- .../bridgetower/test_modeling_bridgetower.py | 6 ++-- .../test_modeling_chinese_clip.py | 6 ++-- tests/models/clap/test_modeling_clap.py | 6 ++-- tests/models/clip/test_modeling_clip.py | 6 ++-- tests/models/clipseg/test_modeling_clipseg.py | 6 ++-- .../test_modeling_efficientloftr.py | 2 +- .../models/groupvit/test_modeling_groupvit.py | 6 ++-- tests/models/owlv2/test_modeling_owlv2.py | 12 +++++-- tests/models/owlvit/test_modeling_owlvit.py | 12 +++++-- .../pix2struct/test_modeling_pix2struct.py | 6 +++- tests/models/siglip/test_modeling_siglip.py | 6 ++-- tests/models/siglip2/test_modeling_siglip2.py | 6 ++-- tests/models/x_clip/test_modeling_x_clip.py | 6 ++-- 31 files changed, 117 insertions(+), 210 deletions(-) diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 8abdb94c96..acfd450e87 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -1199,6 +1199,42 @@ class PretrainedConfig(PushToHubMixin): config_to_return = self return config_to_return + @classmethod + def from_text_vision_configs(cls, text_config, vision_config, **kwargs): + r""" + Instantiate a model config (or a derived class) from text model configuration and vision model + configuration. + + Returns: + [`PreTrainedConfig`]: An instance of a configuration object + """ + + warnings.warn( + "The `from_text_vision_configs` method is deprecated and will be removed in v4.60 of Transformers. Please instantiate " + "the config class directly with `MyConfig(text_config=text_config, vision_config=vision_config, **kwargs)` instead.", + FutureWarning, + ) + + return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) + + @classmethod + def from_text_audio_configs(cls, text_config, audio_config, **kwargs): + r""" + Instantiate a model config (or a derived class) from text model configuration and audio model + configuration. + + Returns: + [`PreTrainedConfig`]: An instance of a configuration object + """ + + warnings.warn( + "The `from_text_audio_configs` method is deprecated and will be removed in v4.60 of Transformers. Please instantiate " + "the config class directly with `MyConfig(text_config=text_config, audio_config=audio_config, **kwargs)` instead.", + FutureWarning, + ) + + return cls(text_config=text_config.to_dict(), audio_config=audio_config.to_dict(), **kwargs) + def get_configuration_file(configuration_files: list[str]) -> str: """ diff --git a/src/transformers/models/aimv2/configuration_aimv2.py b/src/transformers/models/aimv2/configuration_aimv2.py index bf0064ad9f..adab18c744 100644 --- a/src/transformers/models/aimv2/configuration_aimv2.py +++ b/src/transformers/models/aimv2/configuration_aimv2.py @@ -280,17 +280,5 @@ class Aimv2Config(PretrainedConfig): self.logit_scale_init_value = logit_scale_init_value self.max_logit_scale = 100.0 - @classmethod - def from_text_vision_configs(cls, text_config: Aimv2TextConfig, vision_config: Aimv2VisionConfig, **kwargs): - r""" - Instantiate a [`Aimv2Config`] (or a derived class) from aimv2 text model configuration and aimv2 vision - model configuration. - - Returns: - [`Aimv2Config`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["Aimv2Config", "Aimv2VisionConfig", "Aimv2TextConfig"] diff --git a/src/transformers/models/align/configuration_align.py b/src/transformers/models/align/configuration_align.py index 76193bf7a2..b924d85a6c 100644 --- a/src/transformers/models/align/configuration_align.py +++ b/src/transformers/models/align/configuration_align.py @@ -327,17 +327,5 @@ class AlignConfig(PretrainedConfig): self.temperature_init_value = temperature_init_value self.initializer_range = initializer_range - @classmethod - def from_text_vision_configs(cls, text_config: AlignTextConfig, vision_config: AlignVisionConfig, **kwargs): - r""" - Instantiate a [`AlignConfig`] (or a derived class) from align text model configuration and align vision model - configuration. - - Returns: - [`AlignConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["AlignTextConfig", "AlignVisionConfig", "AlignConfig"] diff --git a/src/transformers/models/altclip/configuration_altclip.py b/src/transformers/models/altclip/configuration_altclip.py index 0a07373edd..5e8c0f2a26 100755 --- a/src/transformers/models/altclip/configuration_altclip.py +++ b/src/transformers/models/altclip/configuration_altclip.py @@ -368,17 +368,5 @@ class AltCLIPConfig(PretrainedConfig): self.logit_scale_init_value = logit_scale_init_value self.initializer_factor = 1.0 - @classmethod - def from_text_vision_configs(cls, text_config: AltCLIPTextConfig, vision_config: AltCLIPVisionConfig, **kwargs): - r""" - Instantiate a [`AltCLIPConfig`] (or a derived class) from altclip text model configuration and altclip vision - model configuration. - - Returns: - [`AltCLIPConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["AltCLIPTextConfig", "AltCLIPVisionConfig", "AltCLIPConfig"] diff --git a/src/transformers/models/blip/configuration_blip.py b/src/transformers/models/blip/configuration_blip.py index 35dde07367..6e0a5590c3 100644 --- a/src/transformers/models/blip/configuration_blip.py +++ b/src/transformers/models/blip/configuration_blip.py @@ -313,17 +313,5 @@ class BlipConfig(PretrainedConfig): self.image_text_hidden_size = image_text_hidden_size self.label_smoothing = label_smoothing - @classmethod - def from_text_vision_configs(cls, text_config: BlipTextConfig, vision_config: BlipVisionConfig, **kwargs): - r""" - Instantiate a [`BlipConfig`] (or a derived class) from blip text model configuration and blip vision model - configuration. - - Returns: - [`BlipConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["BlipConfig", "BlipTextConfig", "BlipVisionConfig"] diff --git a/src/transformers/models/bridgetower/configuration_bridgetower.py b/src/transformers/models/bridgetower/configuration_bridgetower.py index 72881b22c3..4c84b0a294 100644 --- a/src/transformers/models/bridgetower/configuration_bridgetower.py +++ b/src/transformers/models/bridgetower/configuration_bridgetower.py @@ -304,16 +304,5 @@ class BridgeTowerConfig(PretrainedConfig): self.text_config = BridgeTowerTextConfig(**text_config) self.vision_config = BridgeTowerVisionConfig(**vision_config) - @classmethod - def from_text_vision_configs( - cls, text_config: BridgeTowerTextConfig, vision_config: BridgeTowerVisionConfig, **kwargs - ): - r""" - Instantiate a [`BridgeTowerConfig`] (or a derived class) from BridgeTower text model configuration. Returns: - [`BridgeTowerConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["BridgeTowerConfig", "BridgeTowerTextConfig", "BridgeTowerVisionConfig"] diff --git a/src/transformers/models/chinese_clip/configuration_chinese_clip.py b/src/transformers/models/chinese_clip/configuration_chinese_clip.py index e47509a364..e7c98d0d2d 100644 --- a/src/transformers/models/chinese_clip/configuration_chinese_clip.py +++ b/src/transformers/models/chinese_clip/configuration_chinese_clip.py @@ -373,18 +373,6 @@ class ChineseCLIPConfig(PretrainedConfig): self.initializer_factor = 1.0 self.initializer_range = 0.02 - @classmethod - def from_text_vision_configs( - cls, text_config: ChineseCLIPTextConfig, vision_config: ChineseCLIPVisionConfig, **kwargs - ): - r""" - Instantiate a [`ChineseCLIPConfig`] (or a derived class) from Chinese-CLIP text model configuration and - Chinese-CLIP vision model configuration. Returns: - [`ChineseCLIPConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - class ChineseCLIPOnnxConfig(OnnxConfig): @property diff --git a/src/transformers/models/clap/configuration_clap.py b/src/transformers/models/clap/configuration_clap.py index 1ee09d08a6..900e8d373f 100644 --- a/src/transformers/models/clap/configuration_clap.py +++ b/src/transformers/models/clap/configuration_clap.py @@ -378,17 +378,5 @@ class ClapConfig(PretrainedConfig): self.initializer_factor = initializer_factor self.num_hidden_layers = self.text_config.num_hidden_layers + len(self.audio_config.depths) - @classmethod - def from_text_audio_configs(cls, text_config: ClapTextConfig, audio_config: ClapAudioConfig, **kwargs): - r""" - Instantiate a [`ClapConfig`] (or a derived class) from clap text model configuration and clap audio model - configuration. - - Returns: - [`ClapConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), audio_config=audio_config.to_dict(), **kwargs) - __all__ = ["ClapAudioConfig", "ClapConfig", "ClapTextConfig"] diff --git a/src/transformers/models/clip/configuration_clip.py b/src/transformers/models/clip/configuration_clip.py index 0626a7334f..0b4fe6ba37 100644 --- a/src/transformers/models/clip/configuration_clip.py +++ b/src/transformers/models/clip/configuration_clip.py @@ -361,18 +361,6 @@ class CLIPConfig(PretrainedConfig): self.logit_scale_init_value = logit_scale_init_value self.initializer_factor = 1.0 - @classmethod - def from_text_vision_configs(cls, text_config: CLIPTextConfig, vision_config: CLIPVisionConfig, **kwargs): - r""" - Instantiate a [`CLIPConfig`] (or a derived class) from clip text model configuration and clip vision model - configuration. - - Returns: - [`CLIPConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - class CLIPOnnxConfig(OnnxConfig): @property diff --git a/src/transformers/models/clipseg/configuration_clipseg.py b/src/transformers/models/clipseg/configuration_clipseg.py index 55c29c2b7d..60b14eb7ef 100644 --- a/src/transformers/models/clipseg/configuration_clipseg.py +++ b/src/transformers/models/clipseg/configuration_clipseg.py @@ -380,17 +380,5 @@ class CLIPSegConfig(PretrainedConfig): self.initializer_factor = 1.0 self.use_complex_transposed_convolution = use_complex_transposed_convolution - @classmethod - def from_text_vision_configs(cls, text_config: CLIPSegTextConfig, vision_config: CLIPSegVisionConfig, **kwargs): - r""" - Instantiate a [`CLIPSegConfig`] (or a derived class) from clipseg text model configuration and clipseg vision - model configuration. - - Returns: - [`CLIPSegConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["CLIPSegConfig", "CLIPSegTextConfig", "CLIPSegVisionConfig"] diff --git a/src/transformers/models/groupvit/configuration_groupvit.py b/src/transformers/models/groupvit/configuration_groupvit.py index ffe8aa5500..d17288ede7 100644 --- a/src/transformers/models/groupvit/configuration_groupvit.py +++ b/src/transformers/models/groupvit/configuration_groupvit.py @@ -357,18 +357,6 @@ class GroupViTConfig(PretrainedConfig): self.initializer_factor = 1.0 self.output_segmentation = False - @classmethod - def from_text_vision_configs(cls, text_config: GroupViTTextConfig, vision_config: GroupViTVisionConfig, **kwargs): - r""" - Instantiate a [`GroupViTConfig`] (or a derived class) from groupvit text model configuration and groupvit - vision model configuration. - - Returns: - [`GroupViTConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - class GroupViTOnnxConfig(OnnxConfig): @property diff --git a/src/transformers/models/pix2struct/configuration_pix2struct.py b/src/transformers/models/pix2struct/configuration_pix2struct.py index db2f0ff7e3..be2110022f 100644 --- a/src/transformers/models/pix2struct/configuration_pix2struct.py +++ b/src/transformers/models/pix2struct/configuration_pix2struct.py @@ -332,19 +332,5 @@ class Pix2StructConfig(PretrainedConfig): self.is_vqa = is_vqa - @classmethod - def from_text_vision_configs( - cls, text_config: Pix2StructTextConfig, vision_config: Pix2StructVisionConfig, **kwargs - ): - r""" - Instantiate a [`Pix2StructConfig`] (or a derived class) from pix2struct text model configuration and pix2struct - vision model configuration. - - Returns: - [`Pix2StructConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["Pix2StructConfig", "Pix2StructTextConfig", "Pix2StructVisionConfig"] diff --git a/src/transformers/models/siglip/configuration_siglip.py b/src/transformers/models/siglip/configuration_siglip.py index f4a140cecc..0c182014fa 100644 --- a/src/transformers/models/siglip/configuration_siglip.py +++ b/src/transformers/models/siglip/configuration_siglip.py @@ -253,17 +253,5 @@ class SiglipConfig(PretrainedConfig): self.initializer_factor = 1.0 - @classmethod - def from_text_vision_configs(cls, text_config: SiglipTextConfig, vision_config: SiglipVisionConfig, **kwargs): - r""" - Instantiate a [`SiglipConfig`] (or a derived class) from siglip text model configuration and siglip vision - model configuration. - - Returns: - [`SiglipConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["SiglipConfig", "SiglipTextConfig", "SiglipVisionConfig"] diff --git a/src/transformers/models/siglip2/configuration_siglip2.py b/src/transformers/models/siglip2/configuration_siglip2.py index 6cb379c670..67ef9df8f4 100644 --- a/src/transformers/models/siglip2/configuration_siglip2.py +++ b/src/transformers/models/siglip2/configuration_siglip2.py @@ -261,17 +261,5 @@ class Siglip2Config(PretrainedConfig): self.initializer_factor = 1.0 - @classmethod - def from_text_vision_configs(cls, text_config: Siglip2TextConfig, vision_config: Siglip2VisionConfig, **kwargs): - r""" - Instantiate a [`Siglip2Config`] (or a derived class) from siglip2 text model configuration and siglip2 vision - model configuration. - - Returns: - [`Siglip2Config`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["Siglip2Config", "Siglip2TextConfig", "Siglip2VisionConfig"] diff --git a/src/transformers/models/x_clip/configuration_x_clip.py b/src/transformers/models/x_clip/configuration_x_clip.py index 310ca960c3..66db819168 100644 --- a/src/transformers/models/x_clip/configuration_x_clip.py +++ b/src/transformers/models/x_clip/configuration_x_clip.py @@ -365,17 +365,5 @@ class XCLIPConfig(PretrainedConfig): self.logit_scale_init_value = logit_scale_init_value self.initializer_factor = 1.0 - @classmethod - def from_text_vision_configs(cls, text_config: XCLIPTextConfig, vision_config: XCLIPVisionConfig, **kwargs): - r""" - Instantiate a [`XCLIPConfig`] (or a derived class) from xclip text model configuration and xclip vision model - configuration. - - Returns: - [`XCLIPConfig`]: An instance of a configuration object - """ - - return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) - __all__ = ["XCLIPConfig", "XCLIPTextConfig", "XCLIPVisionConfig"] diff --git a/tests/models/align/test_modeling_align.py b/tests/models/align/test_modeling_align.py index 15c520d1d2..4e897a554c 100644 --- a/tests/models/align/test_modeling_align.py +++ b/tests/models/align/test_modeling_align.py @@ -408,8 +408,10 @@ class AlignModelTester: return config, input_ids, token_type_ids, input_mask, pixel_values def get_config(self): - return AlignConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return AlignConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, token_type_ids, attention_mask, pixel_values): diff --git a/tests/models/altclip/test_modeling_altclip.py b/tests/models/altclip/test_modeling_altclip.py index d56f6326ac..d1656d06fe 100755 --- a/tests/models/altclip/test_modeling_altclip.py +++ b/tests/models/altclip/test_modeling_altclip.py @@ -376,8 +376,10 @@ class AltCLIPModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return AltCLIPConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return AltCLIPConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py index 851177b01a..d340d3e569 100644 --- a/tests/models/blip/test_modeling_blip.py +++ b/tests/models/blip/test_modeling_blip.py @@ -381,8 +381,10 @@ class BlipModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return BlipConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return BlipConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): @@ -664,8 +666,10 @@ class BlipTextRetrievalModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return BlipConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return BlipConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): @@ -713,8 +717,10 @@ class BlipTextImageModelsModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return BlipConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return BlipConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): @@ -761,8 +767,10 @@ class BlipVQAModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return BlipConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return BlipConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): diff --git a/tests/models/bridgetower/test_modeling_bridgetower.py b/tests/models/bridgetower/test_modeling_bridgetower.py index c59e7f4e14..b0c26f1e7d 100644 --- a/tests/models/bridgetower/test_modeling_bridgetower.py +++ b/tests/models/bridgetower/test_modeling_bridgetower.py @@ -203,9 +203,9 @@ class BridgeTowerModelTester: return (config, input_ids, attention_mask, pixel_values, pixel_mask) def get_config(self): - return BridgeTowerConfig.from_text_vision_configs( - text_config=self.text_model_tester.get_config(), - vision_config=self.vision_model_tester.get_config(), + return BridgeTowerConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), share_cross_modal_transformer_layers=self.share_cross_modal_transformer_layers, share_link_tower_layers=self.share_link_tower_layers, link_tower_type=self.link_tower_type, diff --git a/tests/models/chinese_clip/test_modeling_chinese_clip.py b/tests/models/chinese_clip/test_modeling_chinese_clip.py index 520ff2af3d..7ec9dd42dc 100644 --- a/tests/models/chinese_clip/test_modeling_chinese_clip.py +++ b/tests/models/chinese_clip/test_modeling_chinese_clip.py @@ -515,8 +515,10 @@ class ChineseCLIPModelTester: return config, input_ids, token_type_ids, attention_mask, pixel_values def get_config(self): - return ChineseCLIPConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return ChineseCLIPConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, token_type_ids, attention_mask, pixel_values): diff --git a/tests/models/clap/test_modeling_clap.py b/tests/models/clap/test_modeling_clap.py index 24a28e80bd..22579fa99f 100644 --- a/tests/models/clap/test_modeling_clap.py +++ b/tests/models/clap/test_modeling_clap.py @@ -459,8 +459,10 @@ class ClapModelTester: return config, input_ids, attention_mask, input_features def get_config(self): - return ClapConfig.from_text_audio_configs( - self.text_model_tester.get_config(), self.audio_model_tester.get_config(), projection_dim=64 + return ClapConfig( + text_config=self.text_model_tester.get_config().to_dict(), + audio_config=self.audio_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, attention_mask, input_features): diff --git a/tests/models/clip/test_modeling_clip.py b/tests/models/clip/test_modeling_clip.py index 90506e26db..92de6d4583 100644 --- a/tests/models/clip/test_modeling_clip.py +++ b/tests/models/clip/test_modeling_clip.py @@ -502,8 +502,10 @@ class CLIPModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return CLIPConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return CLIPConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): diff --git a/tests/models/clipseg/test_modeling_clipseg.py b/tests/models/clipseg/test_modeling_clipseg.py index 0a0bc5a758..dcbaeeb661 100644 --- a/tests/models/clipseg/test_modeling_clipseg.py +++ b/tests/models/clipseg/test_modeling_clipseg.py @@ -374,9 +374,9 @@ class CLIPSegModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return CLIPSegConfig.from_text_vision_configs( - self.text_model_tester.get_config(), - self.vision_model_tester.get_config(), + return CLIPSegConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), projection_dim=64, reduce_dim=32, extract_layers=self.extract_layers, diff --git a/tests/models/efficientloftr/test_modeling_efficientloftr.py b/tests/models/efficientloftr/test_modeling_efficientloftr.py index 1efb2e1620..f2aceb15e4 100644 --- a/tests/models/efficientloftr/test_modeling_efficientloftr.py +++ b/tests/models/efficientloftr/test_modeling_efficientloftr.py @@ -216,7 +216,7 @@ class EfficientLoFTRModelTest(ModelTesterMixin, unittest.TestCase): self.assertListEqual( list(hidden_states[0].shape[-2:]), - [self.model_tester.image_height // 2, self.model_tester.image_width // 2], + [self.model_tester.image_height, self.model_tester.image_width], ) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/groupvit/test_modeling_groupvit.py b/tests/models/groupvit/test_modeling_groupvit.py index 3e438a13b3..73af5e4b4b 100644 --- a/tests/models/groupvit/test_modeling_groupvit.py +++ b/tests/models/groupvit/test_modeling_groupvit.py @@ -497,8 +497,10 @@ class GroupViTModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return GroupViTConfig.from_text_vision_configs( - self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 + return GroupViTConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): diff --git a/tests/models/owlv2/test_modeling_owlv2.py b/tests/models/owlv2/test_modeling_owlv2.py index ff297836f3..bbbe6824be 100644 --- a/tests/models/owlv2/test_modeling_owlv2.py +++ b/tests/models/owlv2/test_modeling_owlv2.py @@ -375,7 +375,11 @@ class Owlv2ModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return Owlv2Config.from_text_vision_configs(self.text_config, self.vision_config, projection_dim=64) + return Owlv2Config( + text_config=self.text_config, + vision_config=self.vision_config, + projection_dim=64, + ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): model = Owlv2Model(config).to(torch_device).eval() @@ -589,7 +593,11 @@ class Owlv2ForObjectDetectionTester: return config, pixel_values, input_ids, attention_mask def get_config(self): - return Owlv2Config.from_text_vision_configs(self.text_config, self.vision_config, projection_dim=64) + return Owlv2Config( + text_config=self.text_config, + vision_config=self.vision_config, + projection_dim=64, + ) def create_and_check_model(self, config, pixel_values, input_ids, attention_mask): model = Owlv2ForObjectDetection(config).to(torch_device).eval() diff --git a/tests/models/owlvit/test_modeling_owlvit.py b/tests/models/owlvit/test_modeling_owlvit.py index c1d9df4ea4..d150ae7899 100644 --- a/tests/models/owlvit/test_modeling_owlvit.py +++ b/tests/models/owlvit/test_modeling_owlvit.py @@ -371,7 +371,11 @@ class OwlViTModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return OwlViTConfig.from_text_vision_configs(self.text_config, self.vision_config, projection_dim=64) + return OwlViTConfig( + text_config=self.text_config, + vision_config=self.vision_config, + projection_dim=64, + ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): model = OwlViTModel(config).to(torch_device).eval() @@ -583,7 +587,11 @@ class OwlViTForObjectDetectionTester: return config, pixel_values, input_ids, attention_mask def get_config(self): - return OwlViTConfig.from_text_vision_configs(self.text_config, self.vision_config, projection_dim=64) + return OwlViTConfig( + text_config=self.text_config, + vision_config=self.vision_config, + projection_dim=64, + ) def create_and_check_model(self, config, pixel_values, input_ids, attention_mask): model = OwlViTForObjectDetection(config).to(torch_device).eval() diff --git a/tests/models/pix2struct/test_modeling_pix2struct.py b/tests/models/pix2struct/test_modeling_pix2struct.py index 2b67ec2397..d6a0fa5f08 100644 --- a/tests/models/pix2struct/test_modeling_pix2struct.py +++ b/tests/models/pix2struct/test_modeling_pix2struct.py @@ -383,7 +383,11 @@ class Pix2StructModelTester: return config, input_ids, attention_mask, flattened_patches def get_config(self, text_config, vision_config): - return Pix2StructConfig.from_text_vision_configs(text_config, vision_config, projection_dim=64) + return Pix2StructConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), + projection_dim=64, + ) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() diff --git a/tests/models/siglip/test_modeling_siglip.py b/tests/models/siglip/test_modeling_siglip.py index 4bff15040e..b5c14280bb 100644 --- a/tests/models/siglip/test_modeling_siglip.py +++ b/tests/models/siglip/test_modeling_siglip.py @@ -428,9 +428,9 @@ class SiglipModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return SiglipConfig.from_text_vision_configs( - self.text_model_tester.get_config(), - self.vision_model_tester.get_config(), + return SiglipConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), ) def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): diff --git a/tests/models/siglip2/test_modeling_siglip2.py b/tests/models/siglip2/test_modeling_siglip2.py index f6825308ff..40ca27f1bd 100644 --- a/tests/models/siglip2/test_modeling_siglip2.py +++ b/tests/models/siglip2/test_modeling_siglip2.py @@ -514,9 +514,9 @@ class Siglip2ModelTester: return config, input_ids, attention_mask, pixel_values, pixel_attention_mask, spatial_shapes def get_config(self): - return Siglip2Config.from_text_vision_configs( - self.text_model_tester.get_config(), - self.vision_model_tester.get_config(), + return Siglip2Config( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), ) def create_and_check_model( diff --git a/tests/models/x_clip/test_modeling_x_clip.py b/tests/models/x_clip/test_modeling_x_clip.py index 8c5134fc6d..5c125f0797 100644 --- a/tests/models/x_clip/test_modeling_x_clip.py +++ b/tests/models/x_clip/test_modeling_x_clip.py @@ -493,9 +493,9 @@ class XCLIPModelTester: return config, input_ids, attention_mask, pixel_values def get_config(self): - return XCLIPConfig.from_text_vision_configs( - self.text_model_tester.get_config(), - self.vision_model_tester.get_config(), + return XCLIPConfig( + text_config=self.text_model_tester.get_config().to_dict(), + vision_config=self.vision_model_tester.get_config().to_dict(), projection_dim=self.projection_dim, )