[configuration] remove redundant classmethod (#38812)

* remove redundant classmethod

* warning message, add space between words

* fix tests

* fix copies
This commit is contained in:
Raushan Turganbay
2025-07-28 12:38:48 +02:00
committed by GitHub
parent 02ea23cbde
commit b56d721397
31 changed files with 117 additions and 210 deletions

View File

@@ -1199,6 +1199,42 @@ class PretrainedConfig(PushToHubMixin):
config_to_return = self config_to_return = self
return config_to_return return config_to_return
@classmethod
def from_text_vision_configs(cls, text_config, vision_config, **kwargs):
r"""
Instantiate a model config (or a derived class) from text model configuration and vision model
configuration.
Returns:
[`PreTrainedConfig`]: An instance of a configuration object
"""
warnings.warn(
"The `from_text_vision_configs` method is deprecated and will be removed in v4.60 of Transformers. Please instantiate "
"the config class directly with `MyConfig(text_config=text_config, vision_config=vision_config, **kwargs)` instead.",
FutureWarning,
)
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
@classmethod
def from_text_audio_configs(cls, text_config, audio_config, **kwargs):
r"""
Instantiate a model config (or a derived class) from text model configuration and audio model
configuration.
Returns:
[`PreTrainedConfig`]: An instance of a configuration object
"""
warnings.warn(
"The `from_text_audio_configs` method is deprecated and will be removed in v4.60 of Transformers. Please instantiate "
"the config class directly with `MyConfig(text_config=text_config, audio_config=audio_config, **kwargs)` instead.",
FutureWarning,
)
return cls(text_config=text_config.to_dict(), audio_config=audio_config.to_dict(), **kwargs)
def get_configuration_file(configuration_files: list[str]) -> str: def get_configuration_file(configuration_files: list[str]) -> str:
""" """

View File

@@ -280,17 +280,5 @@ class Aimv2Config(PretrainedConfig):
self.logit_scale_init_value = logit_scale_init_value self.logit_scale_init_value = logit_scale_init_value
self.max_logit_scale = 100.0 self.max_logit_scale = 100.0
@classmethod
def from_text_vision_configs(cls, text_config: Aimv2TextConfig, vision_config: Aimv2VisionConfig, **kwargs):
r"""
Instantiate a [`Aimv2Config`] (or a derived class) from aimv2 text model configuration and aimv2 vision
model configuration.
Returns:
[`Aimv2Config`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["Aimv2Config", "Aimv2VisionConfig", "Aimv2TextConfig"] __all__ = ["Aimv2Config", "Aimv2VisionConfig", "Aimv2TextConfig"]

View File

@@ -327,17 +327,5 @@ class AlignConfig(PretrainedConfig):
self.temperature_init_value = temperature_init_value self.temperature_init_value = temperature_init_value
self.initializer_range = initializer_range self.initializer_range = initializer_range
@classmethod
def from_text_vision_configs(cls, text_config: AlignTextConfig, vision_config: AlignVisionConfig, **kwargs):
r"""
Instantiate a [`AlignConfig`] (or a derived class) from align text model configuration and align vision model
configuration.
Returns:
[`AlignConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["AlignTextConfig", "AlignVisionConfig", "AlignConfig"] __all__ = ["AlignTextConfig", "AlignVisionConfig", "AlignConfig"]

View File

@@ -368,17 +368,5 @@ class AltCLIPConfig(PretrainedConfig):
self.logit_scale_init_value = logit_scale_init_value self.logit_scale_init_value = logit_scale_init_value
self.initializer_factor = 1.0 self.initializer_factor = 1.0
@classmethod
def from_text_vision_configs(cls, text_config: AltCLIPTextConfig, vision_config: AltCLIPVisionConfig, **kwargs):
r"""
Instantiate a [`AltCLIPConfig`] (or a derived class) from altclip text model configuration and altclip vision
model configuration.
Returns:
[`AltCLIPConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["AltCLIPTextConfig", "AltCLIPVisionConfig", "AltCLIPConfig"] __all__ = ["AltCLIPTextConfig", "AltCLIPVisionConfig", "AltCLIPConfig"]

View File

@@ -313,17 +313,5 @@ class BlipConfig(PretrainedConfig):
self.image_text_hidden_size = image_text_hidden_size self.image_text_hidden_size = image_text_hidden_size
self.label_smoothing = label_smoothing self.label_smoothing = label_smoothing
@classmethod
def from_text_vision_configs(cls, text_config: BlipTextConfig, vision_config: BlipVisionConfig, **kwargs):
r"""
Instantiate a [`BlipConfig`] (or a derived class) from blip text model configuration and blip vision model
configuration.
Returns:
[`BlipConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["BlipConfig", "BlipTextConfig", "BlipVisionConfig"] __all__ = ["BlipConfig", "BlipTextConfig", "BlipVisionConfig"]

View File

@@ -304,16 +304,5 @@ class BridgeTowerConfig(PretrainedConfig):
self.text_config = BridgeTowerTextConfig(**text_config) self.text_config = BridgeTowerTextConfig(**text_config)
self.vision_config = BridgeTowerVisionConfig(**vision_config) self.vision_config = BridgeTowerVisionConfig(**vision_config)
@classmethod
def from_text_vision_configs(
cls, text_config: BridgeTowerTextConfig, vision_config: BridgeTowerVisionConfig, **kwargs
):
r"""
Instantiate a [`BridgeTowerConfig`] (or a derived class) from BridgeTower text model configuration. Returns:
[`BridgeTowerConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["BridgeTowerConfig", "BridgeTowerTextConfig", "BridgeTowerVisionConfig"] __all__ = ["BridgeTowerConfig", "BridgeTowerTextConfig", "BridgeTowerVisionConfig"]

View File

@@ -373,18 +373,6 @@ class ChineseCLIPConfig(PretrainedConfig):
self.initializer_factor = 1.0 self.initializer_factor = 1.0
self.initializer_range = 0.02 self.initializer_range = 0.02
@classmethod
def from_text_vision_configs(
cls, text_config: ChineseCLIPTextConfig, vision_config: ChineseCLIPVisionConfig, **kwargs
):
r"""
Instantiate a [`ChineseCLIPConfig`] (or a derived class) from Chinese-CLIP text model configuration and
Chinese-CLIP vision model configuration. Returns:
[`ChineseCLIPConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
class ChineseCLIPOnnxConfig(OnnxConfig): class ChineseCLIPOnnxConfig(OnnxConfig):
@property @property

View File

@@ -378,17 +378,5 @@ class ClapConfig(PretrainedConfig):
self.initializer_factor = initializer_factor self.initializer_factor = initializer_factor
self.num_hidden_layers = self.text_config.num_hidden_layers + len(self.audio_config.depths) self.num_hidden_layers = self.text_config.num_hidden_layers + len(self.audio_config.depths)
@classmethod
def from_text_audio_configs(cls, text_config: ClapTextConfig, audio_config: ClapAudioConfig, **kwargs):
r"""
Instantiate a [`ClapConfig`] (or a derived class) from clap text model configuration and clap audio model
configuration.
Returns:
[`ClapConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), audio_config=audio_config.to_dict(), **kwargs)
__all__ = ["ClapAudioConfig", "ClapConfig", "ClapTextConfig"] __all__ = ["ClapAudioConfig", "ClapConfig", "ClapTextConfig"]

View File

@@ -361,18 +361,6 @@ class CLIPConfig(PretrainedConfig):
self.logit_scale_init_value = logit_scale_init_value self.logit_scale_init_value = logit_scale_init_value
self.initializer_factor = 1.0 self.initializer_factor = 1.0
@classmethod
def from_text_vision_configs(cls, text_config: CLIPTextConfig, vision_config: CLIPVisionConfig, **kwargs):
r"""
Instantiate a [`CLIPConfig`] (or a derived class) from clip text model configuration and clip vision model
configuration.
Returns:
[`CLIPConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
class CLIPOnnxConfig(OnnxConfig): class CLIPOnnxConfig(OnnxConfig):
@property @property

View File

@@ -380,17 +380,5 @@ class CLIPSegConfig(PretrainedConfig):
self.initializer_factor = 1.0 self.initializer_factor = 1.0
self.use_complex_transposed_convolution = use_complex_transposed_convolution self.use_complex_transposed_convolution = use_complex_transposed_convolution
@classmethod
def from_text_vision_configs(cls, text_config: CLIPSegTextConfig, vision_config: CLIPSegVisionConfig, **kwargs):
r"""
Instantiate a [`CLIPSegConfig`] (or a derived class) from clipseg text model configuration and clipseg vision
model configuration.
Returns:
[`CLIPSegConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["CLIPSegConfig", "CLIPSegTextConfig", "CLIPSegVisionConfig"] __all__ = ["CLIPSegConfig", "CLIPSegTextConfig", "CLIPSegVisionConfig"]

View File

@@ -357,18 +357,6 @@ class GroupViTConfig(PretrainedConfig):
self.initializer_factor = 1.0 self.initializer_factor = 1.0
self.output_segmentation = False self.output_segmentation = False
@classmethod
def from_text_vision_configs(cls, text_config: GroupViTTextConfig, vision_config: GroupViTVisionConfig, **kwargs):
r"""
Instantiate a [`GroupViTConfig`] (or a derived class) from groupvit text model configuration and groupvit
vision model configuration.
Returns:
[`GroupViTConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
class GroupViTOnnxConfig(OnnxConfig): class GroupViTOnnxConfig(OnnxConfig):
@property @property

View File

@@ -332,19 +332,5 @@ class Pix2StructConfig(PretrainedConfig):
self.is_vqa = is_vqa self.is_vqa = is_vqa
@classmethod
def from_text_vision_configs(
cls, text_config: Pix2StructTextConfig, vision_config: Pix2StructVisionConfig, **kwargs
):
r"""
Instantiate a [`Pix2StructConfig`] (or a derived class) from pix2struct text model configuration and pix2struct
vision model configuration.
Returns:
[`Pix2StructConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["Pix2StructConfig", "Pix2StructTextConfig", "Pix2StructVisionConfig"] __all__ = ["Pix2StructConfig", "Pix2StructTextConfig", "Pix2StructVisionConfig"]

View File

@@ -253,17 +253,5 @@ class SiglipConfig(PretrainedConfig):
self.initializer_factor = 1.0 self.initializer_factor = 1.0
@classmethod
def from_text_vision_configs(cls, text_config: SiglipTextConfig, vision_config: SiglipVisionConfig, **kwargs):
r"""
Instantiate a [`SiglipConfig`] (or a derived class) from siglip text model configuration and siglip vision
model configuration.
Returns:
[`SiglipConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["SiglipConfig", "SiglipTextConfig", "SiglipVisionConfig"] __all__ = ["SiglipConfig", "SiglipTextConfig", "SiglipVisionConfig"]

View File

@@ -261,17 +261,5 @@ class Siglip2Config(PretrainedConfig):
self.initializer_factor = 1.0 self.initializer_factor = 1.0
@classmethod
def from_text_vision_configs(cls, text_config: Siglip2TextConfig, vision_config: Siglip2VisionConfig, **kwargs):
r"""
Instantiate a [`Siglip2Config`] (or a derived class) from siglip2 text model configuration and siglip2 vision
model configuration.
Returns:
[`Siglip2Config`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["Siglip2Config", "Siglip2TextConfig", "Siglip2VisionConfig"] __all__ = ["Siglip2Config", "Siglip2TextConfig", "Siglip2VisionConfig"]

View File

@@ -365,17 +365,5 @@ class XCLIPConfig(PretrainedConfig):
self.logit_scale_init_value = logit_scale_init_value self.logit_scale_init_value = logit_scale_init_value
self.initializer_factor = 1.0 self.initializer_factor = 1.0
@classmethod
def from_text_vision_configs(cls, text_config: XCLIPTextConfig, vision_config: XCLIPVisionConfig, **kwargs):
r"""
Instantiate a [`XCLIPConfig`] (or a derived class) from xclip text model configuration and xclip vision model
configuration.
Returns:
[`XCLIPConfig`]: An instance of a configuration object
"""
return cls(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs)
__all__ = ["XCLIPConfig", "XCLIPTextConfig", "XCLIPVisionConfig"] __all__ = ["XCLIPConfig", "XCLIPTextConfig", "XCLIPVisionConfig"]

View File

@@ -408,8 +408,10 @@ class AlignModelTester:
return config, input_ids, token_type_ids, input_mask, pixel_values return config, input_ids, token_type_ids, input_mask, pixel_values
def get_config(self): def get_config(self):
return AlignConfig.from_text_vision_configs( return AlignConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, token_type_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, token_type_ids, attention_mask, pixel_values):

View File

@@ -376,8 +376,10 @@ class AltCLIPModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return AltCLIPConfig.from_text_vision_configs( return AltCLIPConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):

View File

@@ -381,8 +381,10 @@ class BlipModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return BlipConfig.from_text_vision_configs( return BlipConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):
@@ -664,8 +666,10 @@ class BlipTextRetrievalModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return BlipConfig.from_text_vision_configs( return BlipConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):
@@ -713,8 +717,10 @@ class BlipTextImageModelsModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return BlipConfig.from_text_vision_configs( return BlipConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):
@@ -761,8 +767,10 @@ class BlipVQAModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return BlipConfig.from_text_vision_configs( return BlipConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):

View File

@@ -203,9 +203,9 @@ class BridgeTowerModelTester:
return (config, input_ids, attention_mask, pixel_values, pixel_mask) return (config, input_ids, attention_mask, pixel_values, pixel_mask)
def get_config(self): def get_config(self):
return BridgeTowerConfig.from_text_vision_configs( return BridgeTowerConfig(
text_config=self.text_model_tester.get_config(), text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config(), vision_config=self.vision_model_tester.get_config().to_dict(),
share_cross_modal_transformer_layers=self.share_cross_modal_transformer_layers, share_cross_modal_transformer_layers=self.share_cross_modal_transformer_layers,
share_link_tower_layers=self.share_link_tower_layers, share_link_tower_layers=self.share_link_tower_layers,
link_tower_type=self.link_tower_type, link_tower_type=self.link_tower_type,

View File

@@ -515,8 +515,10 @@ class ChineseCLIPModelTester:
return config, input_ids, token_type_ids, attention_mask, pixel_values return config, input_ids, token_type_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return ChineseCLIPConfig.from_text_vision_configs( return ChineseCLIPConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, token_type_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, token_type_ids, attention_mask, pixel_values):

View File

@@ -459,8 +459,10 @@ class ClapModelTester:
return config, input_ids, attention_mask, input_features return config, input_ids, attention_mask, input_features
def get_config(self): def get_config(self):
return ClapConfig.from_text_audio_configs( return ClapConfig(
self.text_model_tester.get_config(), self.audio_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
audio_config=self.audio_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, attention_mask, input_features): def create_and_check_model(self, config, input_ids, attention_mask, input_features):

View File

@@ -502,8 +502,10 @@ class CLIPModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return CLIPConfig.from_text_vision_configs( return CLIPConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):

View File

@@ -374,9 +374,9 @@ class CLIPSegModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return CLIPSegConfig.from_text_vision_configs( return CLIPSegConfig(
self.text_model_tester.get_config(), text_config=self.text_model_tester.get_config().to_dict(),
self.vision_model_tester.get_config(), vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64, projection_dim=64,
reduce_dim=32, reduce_dim=32,
extract_layers=self.extract_layers, extract_layers=self.extract_layers,

View File

@@ -216,7 +216,7 @@ class EfficientLoFTRModelTest(ModelTesterMixin, unittest.TestCase):
self.assertListEqual( self.assertListEqual(
list(hidden_states[0].shape[-2:]), list(hidden_states[0].shape[-2:]),
[self.model_tester.image_height // 2, self.model_tester.image_width // 2], [self.model_tester.image_height, self.model_tester.image_width],
) )
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

View File

@@ -497,8 +497,10 @@ class GroupViTModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return GroupViTConfig.from_text_vision_configs( return GroupViTConfig(
self.text_model_tester.get_config(), self.vision_model_tester.get_config(), projection_dim=64 text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
) )
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):

View File

@@ -375,7 +375,11 @@ class Owlv2ModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return Owlv2Config.from_text_vision_configs(self.text_config, self.vision_config, projection_dim=64) return Owlv2Config(
text_config=self.text_config,
vision_config=self.vision_config,
projection_dim=64,
)
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):
model = Owlv2Model(config).to(torch_device).eval() model = Owlv2Model(config).to(torch_device).eval()
@@ -589,7 +593,11 @@ class Owlv2ForObjectDetectionTester:
return config, pixel_values, input_ids, attention_mask return config, pixel_values, input_ids, attention_mask
def get_config(self): def get_config(self):
return Owlv2Config.from_text_vision_configs(self.text_config, self.vision_config, projection_dim=64) return Owlv2Config(
text_config=self.text_config,
vision_config=self.vision_config,
projection_dim=64,
)
def create_and_check_model(self, config, pixel_values, input_ids, attention_mask): def create_and_check_model(self, config, pixel_values, input_ids, attention_mask):
model = Owlv2ForObjectDetection(config).to(torch_device).eval() model = Owlv2ForObjectDetection(config).to(torch_device).eval()

View File

@@ -371,7 +371,11 @@ class OwlViTModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return OwlViTConfig.from_text_vision_configs(self.text_config, self.vision_config, projection_dim=64) return OwlViTConfig(
text_config=self.text_config,
vision_config=self.vision_config,
projection_dim=64,
)
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):
model = OwlViTModel(config).to(torch_device).eval() model = OwlViTModel(config).to(torch_device).eval()
@@ -583,7 +587,11 @@ class OwlViTForObjectDetectionTester:
return config, pixel_values, input_ids, attention_mask return config, pixel_values, input_ids, attention_mask
def get_config(self): def get_config(self):
return OwlViTConfig.from_text_vision_configs(self.text_config, self.vision_config, projection_dim=64) return OwlViTConfig(
text_config=self.text_config,
vision_config=self.vision_config,
projection_dim=64,
)
def create_and_check_model(self, config, pixel_values, input_ids, attention_mask): def create_and_check_model(self, config, pixel_values, input_ids, attention_mask):
model = OwlViTForObjectDetection(config).to(torch_device).eval() model = OwlViTForObjectDetection(config).to(torch_device).eval()

View File

@@ -383,7 +383,11 @@ class Pix2StructModelTester:
return config, input_ids, attention_mask, flattened_patches return config, input_ids, attention_mask, flattened_patches
def get_config(self, text_config, vision_config): def get_config(self, text_config, vision_config):
return Pix2StructConfig.from_text_vision_configs(text_config, vision_config, projection_dim=64) return Pix2StructConfig(
text_config=self.text_model_tester.get_config().to_dict(),
vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=64,
)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()

View File

@@ -428,9 +428,9 @@ class SiglipModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return SiglipConfig.from_text_vision_configs( return SiglipConfig(
self.text_model_tester.get_config(), text_config=self.text_model_tester.get_config().to_dict(),
self.vision_model_tester.get_config(), vision_config=self.vision_model_tester.get_config().to_dict(),
) )
def create_and_check_model(self, config, input_ids, attention_mask, pixel_values): def create_and_check_model(self, config, input_ids, attention_mask, pixel_values):

View File

@@ -514,9 +514,9 @@ class Siglip2ModelTester:
return config, input_ids, attention_mask, pixel_values, pixel_attention_mask, spatial_shapes return config, input_ids, attention_mask, pixel_values, pixel_attention_mask, spatial_shapes
def get_config(self): def get_config(self):
return Siglip2Config.from_text_vision_configs( return Siglip2Config(
self.text_model_tester.get_config(), text_config=self.text_model_tester.get_config().to_dict(),
self.vision_model_tester.get_config(), vision_config=self.vision_model_tester.get_config().to_dict(),
) )
def create_and_check_model( def create_and_check_model(

View File

@@ -493,9 +493,9 @@ class XCLIPModelTester:
return config, input_ids, attention_mask, pixel_values return config, input_ids, attention_mask, pixel_values
def get_config(self): def get_config(self):
return XCLIPConfig.from_text_vision_configs( return XCLIPConfig(
self.text_model_tester.get_config(), text_config=self.text_model_tester.get_config().to_dict(),
self.vision_model_tester.get_config(), vision_config=self.vision_model_tester.get_config().to_dict(),
projection_dim=self.projection_dim, projection_dim=self.projection_dim,
) )