From 16d6faef9a3d7cb6c002d3af4f0dc566195bff93 Mon Sep 17 00:00:00 2001 From: Pavel Iakubovskii Date: Mon, 4 Aug 2025 11:35:09 +0100 Subject: [PATCH] [core] Fix attn_implementation setter with missing `sub_configs` (#39855) * fix * add sub_configs * remove case for attention setter * fix None * Add test * Fix sub-configs * fix tests_config * fix consistency * fix fsmt * fix --- .../configuration_conditional_detr.py | 8 ++++++++ .../models/d_fine/configuration_d_fine.py | 8 ++++++++ .../models/d_fine/modular_d_fine.py | 8 ++++++++ .../models/dab_detr/configuration_dab_detr.py | 8 ++++++++ .../configuration_deformable_detr.py | 8 ++++++++ .../deprecated/deta/configuration_deta.py | 8 ++++++++ .../vit_hybrid/configuration_vit_hybrid.py | 8 ++++++++ .../configuration_depth_anything.py | 8 ++++++++ .../models/detr/configuration_detr.py | 8 ++++++++ .../models/fsmt/configuration_fsmt.py | 5 +++-- .../configuration_grounding_dino.py | 11 +++++++++++ .../mask2former/configuration_mask2former.py | 8 ++++++++ .../maskformer/configuration_maskformer.py | 9 +++++++++ .../configuration_mm_grounding_dino.py | 11 +++++++++++ .../omdet_turbo/configuration_omdet_turbo.py | 11 +++++++++++ .../oneformer/configuration_oneformer.py | 8 ++++++++ .../pix2struct/configuration_pix2struct.py | 1 + .../configuration_prompt_depth_anything.py | 8 ++++++++ .../models/rt_detr/configuration_rt_detr.py | 8 ++++++++ .../rt_detr_v2/configuration_rt_detr_v2.py | 8 ++++++++ .../models/rt_detr_v2/modular_rt_detr_v2.py | 8 ++++++++ .../superglue/configuration_superglue.py | 4 ++++ .../configuration_table_transformer.py | 8 ++++++++ .../models/tvp/configuration_tvp.py | 8 ++++++++ .../models/upernet/configuration_upernet.py | 8 ++++++++ .../models/vitmatte/configuration_vitmatte.py | 8 ++++++++ .../models/vitpose/configuration_vitpose.py | 8 ++++++++ .../models/zoedepth/configuration_zoedepth.py | 8 ++++++++ tests/test_configuration_common.py | 1 + tests/test_modeling_common.py | 19 +++++++++++++++++++ 30 files changed, 238 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index b85ef10556..26a8ed0514 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -253,6 +253,14 @@ class ConditionalDetrConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + class ConditionalDetrOnnxConfig(OnnxConfig): torch_onnx_minimum_version = version.parse("1.11") diff --git a/src/transformers/models/d_fine/configuration_d_fine.py b/src/transformers/models/d_fine/configuration_d_fine.py index 3f408cb785..7484d9a347 100644 --- a/src/transformers/models/d_fine/configuration_d_fine.py +++ b/src/transformers/models/d_fine/configuration_d_fine.py @@ -404,6 +404,14 @@ class DFineConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + @classmethod def from_backbone_configs(cls, backbone_config: PretrainedConfig, **kwargs): """Instantiate a [`DFineConfig`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/d_fine/modular_d_fine.py b/src/transformers/models/d_fine/modular_d_fine.py index 4b5a9a2b33..883f07a997 100644 --- a/src/transformers/models/d_fine/modular_d_fine.py +++ b/src/transformers/models/d_fine/modular_d_fine.py @@ -423,6 +423,14 @@ class DFineConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + @classmethod def from_backbone_configs(cls, backbone_config: PretrainedConfig, **kwargs): """Instantiate a [`DFineConfig`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/dab_detr/configuration_dab_detr.py b/src/transformers/models/dab_detr/configuration_dab_detr.py index 398e6f2659..e53d7783a6 100644 --- a/src/transformers/models/dab_detr/configuration_dab_detr.py +++ b/src/transformers/models/dab_detr/configuration_dab_detr.py @@ -256,5 +256,13 @@ class DabDetrConfig(PretrainedConfig): self.initializer_bias_prior_prob = initializer_bias_prior_prob super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + __all__ = ["DabDetrConfig"] diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index 7e6b94d7d3..b85a739990 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -278,5 +278,13 @@ class DeformableDetrConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + __all__ = ["DeformableDetrConfig"] diff --git a/src/transformers/models/deprecated/deta/configuration_deta.py b/src/transformers/models/deprecated/deta/configuration_deta.py index 90c779a8ee..2109902ac0 100644 --- a/src/transformers/models/deprecated/deta/configuration_deta.py +++ b/src/transformers/models/deprecated/deta/configuration_deta.py @@ -266,5 +266,13 @@ class DetaConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + __all__ = ["DetaConfig"] diff --git a/src/transformers/models/deprecated/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/configuration_vit_hybrid.py index c66367cc0a..e96c6c4a1b 100644 --- a/src/transformers/models/deprecated/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/configuration_vit_hybrid.py @@ -168,5 +168,13 @@ class ViTHybridConfig(PretrainedConfig): self.num_channels = num_channels self.qkv_bias = qkv_bias + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + __all__ = ["ViTHybridConfig"] diff --git a/src/transformers/models/depth_anything/configuration_depth_anything.py b/src/transformers/models/depth_anything/configuration_depth_anything.py index b28508cbe9..65884fe67c 100644 --- a/src/transformers/models/depth_anything/configuration_depth_anything.py +++ b/src/transformers/models/depth_anything/configuration_depth_anything.py @@ -151,6 +151,14 @@ class DepthAnythingConfig(PretrainedConfig): self.depth_estimation_type = depth_estimation_type self.max_depth = max_depth if max_depth else 1 + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + def to_dict(self): """ Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. Returns: diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index 976b937312..c954038292 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -252,6 +252,14 @@ class DetrConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + @classmethod def from_backbone_config(cls, backbone_config: PretrainedConfig, **kwargs): """Instantiate a [`DetrConfig`] (or a derived class) from a pre-trained backbone model configuration. diff --git a/src/transformers/models/fsmt/configuration_fsmt.py b/src/transformers/models/fsmt/configuration_fsmt.py index f120d3c436..390e6f1ab5 100644 --- a/src/transformers/models/fsmt/configuration_fsmt.py +++ b/src/transformers/models/fsmt/configuration_fsmt.py @@ -28,8 +28,8 @@ class DecoderConfig(PretrainedConfig): model_type = "fsmt_decoder" - def __init__(self, vocab_size=0, bos_token_id=0, is_encoder_decoder=True): - super().__init__() + def __init__(self, vocab_size=0, bos_token_id=0, is_encoder_decoder=True, **kwargs): + super().__init__(**kwargs) self.vocab_size = vocab_size self.bos_token_id = bos_token_id self.is_encoder_decoder = is_encoder_decoder @@ -134,6 +134,7 @@ class FSMTConfig(PretrainedConfig): model_type = "fsmt" attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"} + sub_configs = {"decoder": DecoderConfig} # update the defaults from config file def __init__( diff --git a/src/transformers/models/grounding_dino/configuration_grounding_dino.py b/src/transformers/models/grounding_dino/configuration_grounding_dino.py index a45848b4c4..838a897f70 100644 --- a/src/transformers/models/grounding_dino/configuration_grounding_dino.py +++ b/src/transformers/models/grounding_dino/configuration_grounding_dino.py @@ -294,5 +294,16 @@ class GroundingDinoConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + sub_configs = {} + backbone_config = getattr(self, "backbone_config", None) + text_config = getattr(self, "text_config", None) + if isinstance(backbone_config, PretrainedConfig): + sub_configs["backbone_config"] = type(backbone_config) + if isinstance(text_config, PretrainedConfig): + sub_configs["text_config"] = type(self.text_config) + return sub_configs + __all__ = ["GroundingDinoConfig"] diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index 5f9ab6f5e6..9ae93892ae 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -236,6 +236,14 @@ class Mask2FormerConfig(PretrainedConfig): super().__init__(**kwargs) + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + @classmethod def from_backbone_config(cls, backbone_config: PretrainedConfig, **kwargs): """Instantiate a [`Mask2FormerConfig`] (or a derived class) from a pre-trained backbone model configuration. diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index 60acff610f..d988acb45e 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -200,6 +200,15 @@ class MaskFormerConfig(PretrainedConfig): self.backbone_kwargs = backbone_kwargs super().__init__(**kwargs) + @property + def sub_configs(self): + sub_configs = {} + if self.backbone_config is not None and self.backbone_config != {}: + sub_configs["backbone_config"] = type(self.backbone_config) + if self.decoder_config is not None and self.decoder_config != {}: + sub_configs["decoder_config"] = type(self.decoder_config) + return sub_configs + @classmethod def from_backbone_and_decoder_configs( cls, backbone_config: PretrainedConfig, decoder_config: PretrainedConfig, **kwargs diff --git a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py index 42e3af33a1..193faecf39 100644 --- a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +++ b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py @@ -288,5 +288,16 @@ class MMGroundingDinoConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + sub_configs = {} + backbone_config = getattr(self, "backbone_config", None) + text_config = getattr(self, "text_config", None) + if isinstance(backbone_config, PretrainedConfig): + sub_configs["backbone_config"] = type(backbone_config) + if isinstance(text_config, PretrainedConfig): + sub_configs["text_config"] = type(self.text_config) + return sub_configs + __all__ = ["MMGroundingDinoConfig"] diff --git a/src/transformers/models/omdet_turbo/configuration_omdet_turbo.py b/src/transformers/models/omdet_turbo/configuration_omdet_turbo.py index 477880de3c..e11cc563db 100644 --- a/src/transformers/models/omdet_turbo/configuration_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/configuration_omdet_turbo.py @@ -289,5 +289,16 @@ class OmDetTurboConfig(PretrainedConfig): super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) + @property + def sub_configs(self): + sub_configs = {} + backbone_config = getattr(self, "backbone_config", None) + text_config = getattr(self, "text_config", None) + if isinstance(backbone_config, PretrainedConfig): + sub_configs["backbone_config"] = type(backbone_config) + if isinstance(text_config, PretrainedConfig): + sub_configs["text_config"] = type(text_config) + return sub_configs + __all__ = ["OmDetTurboConfig"] diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index 580995d655..1b9229f040 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -273,5 +273,13 @@ class OneFormerConfig(PretrainedConfig): super().__init__(**kwargs) + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + __all__ = ["OneFormerConfig"] diff --git a/src/transformers/models/pix2struct/configuration_pix2struct.py b/src/transformers/models/pix2struct/configuration_pix2struct.py index be2110022f..89109350d9 100644 --- a/src/transformers/models/pix2struct/configuration_pix2struct.py +++ b/src/transformers/models/pix2struct/configuration_pix2struct.py @@ -293,6 +293,7 @@ class Pix2StructConfig(PretrainedConfig): ```""" model_type = "pix2struct" + sub_configs = {"text_config": Pix2StructTextConfig, "vision_config": Pix2StructVisionConfig} def __init__( self, diff --git a/src/transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py b/src/transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py index 5405d443fe..a45ae66cf3 100644 --- a/src/transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +++ b/src/transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py @@ -154,6 +154,14 @@ class PromptDepthAnythingConfig(PretrainedConfig): self.depth_estimation_type = depth_estimation_type self.max_depth = max_depth if max_depth else 1 + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + def to_dict(self): """ Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. Returns: diff --git a/src/transformers/models/rt_detr/configuration_rt_detr.py b/src/transformers/models/rt_detr/configuration_rt_detr.py index 1019c34650..994d4a6fd6 100644 --- a/src/transformers/models/rt_detr/configuration_rt_detr.py +++ b/src/transformers/models/rt_detr/configuration_rt_detr.py @@ -343,6 +343,14 @@ class RTDetrConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + @classmethod def from_backbone_configs(cls, backbone_config: PretrainedConfig, **kwargs): """Instantiate a [`RTDetrConfig`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/rt_detr_v2/configuration_rt_detr_v2.py b/src/transformers/models/rt_detr_v2/configuration_rt_detr_v2.py index 602e63fabe..6f4a53483b 100644 --- a/src/transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +++ b/src/transformers/models/rt_detr_v2/configuration_rt_detr_v2.py @@ -358,6 +358,14 @@ class RTDetrV2Config(PretrainedConfig): self.decoder_offset_scale = decoder_offset_scale self.decoder_method = decoder_method + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + @classmethod def from_backbone_configs(cls, backbone_config: PretrainedConfig, **kwargs): """Instantiate a [`RTDetrV2Config`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/rt_detr_v2/modular_rt_detr_v2.py b/src/transformers/models/rt_detr_v2/modular_rt_detr_v2.py index 0bcff45cfa..9c8477c86e 100644 --- a/src/transformers/models/rt_detr_v2/modular_rt_detr_v2.py +++ b/src/transformers/models/rt_detr_v2/modular_rt_detr_v2.py @@ -369,6 +369,14 @@ class RTDetrV2Config(PretrainedConfig): self.decoder_offset_scale = decoder_offset_scale self.decoder_method = decoder_method + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + @classmethod def from_backbone_configs(cls, backbone_config: PretrainedConfig, **kwargs): """Instantiate a [`RTDetrV2Config`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/superglue/configuration_superglue.py b/src/transformers/models/superglue/configuration_superglue.py index de6086114f..74bd991f95 100644 --- a/src/transformers/models/superglue/configuration_superglue.py +++ b/src/transformers/models/superglue/configuration_superglue.py @@ -114,5 +114,9 @@ class SuperGlueConfig(PretrainedConfig): super().__init__(**kwargs) + @property + def sub_configs(self): + return {"keypoint_detector_config": type(self.keypoint_detector_config)} + __all__ = ["SuperGlueConfig"] diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 6c091f5ef7..32eed6ce0d 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -253,6 +253,14 @@ class TableTransformerConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + # Copied from transformers.models.detr.configuration_detr.DetrOnnxConfig class TableTransformerOnnxConfig(OnnxConfig): diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index 4a48574e11..be7c785084 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -172,6 +172,14 @@ class TvpConfig(PretrainedConfig): self.initializer_range = initializer_range self.attention_probs_dropout_prob = attention_probs_dropout_prob + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + @classmethod def from_backbone_config(cls, backbone_config: PretrainedConfig, **kwargs): """Instantiate a [`TvpConfig`] (or a derived class) from a pre-trained backbone model configuration. diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index 17af62c4b2..d116b22fcf 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -136,5 +136,13 @@ class UperNetConfig(PretrainedConfig): self.auxiliary_concat_input = auxiliary_concat_input self.loss_ignore_index = loss_ignore_index + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + __all__ = ["UperNetConfig"] diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index ff99c8287e..f63c3e4eb8 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -121,6 +121,14 @@ class VitMatteConfig(PretrainedConfig): self.convstream_hidden_sizes = convstream_hidden_sizes self.fusion_hidden_sizes = fusion_hidden_sizes + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + def to_dict(self): """ Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. Returns: diff --git a/src/transformers/models/vitpose/configuration_vitpose.py b/src/transformers/models/vitpose/configuration_vitpose.py index aba8fec7ae..777e3d3c60 100644 --- a/src/transformers/models/vitpose/configuration_vitpose.py +++ b/src/transformers/models/vitpose/configuration_vitpose.py @@ -122,5 +122,13 @@ class VitPoseConfig(PretrainedConfig): self.scale_factor = scale_factor self.use_simple_decoder = use_simple_decoder + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + __all__ = ["VitPoseConfig"] diff --git a/src/transformers/models/zoedepth/configuration_zoedepth.py b/src/transformers/models/zoedepth/configuration_zoedepth.py index cab3b713e1..ac89f815f8 100644 --- a/src/transformers/models/zoedepth/configuration_zoedepth.py +++ b/src/transformers/models/zoedepth/configuration_zoedepth.py @@ -233,5 +233,13 @@ class ZoeDepthConfig(PretrainedConfig): self.patch_transformer_intermediate_size = patch_transformer_intermediate_size self.patch_transformer_num_attention_heads = patch_transformer_num_attention_heads + @property + def sub_configs(self): + return ( + {"backbone_config": type(self.backbone_config)} + if getattr(self, "backbone_config", None) is not None + else {} + ) + __all__ = ["ZOEDEPTH_PRETRAINED_CONFIG_ARCHIVE_MAP", "ZoeDepthConfig"] diff --git a/tests/test_configuration_common.py b/tests/test_configuration_common.py index 4d4ce3a3f1..c47e23cc3a 100644 --- a/tests/test_configuration_common.py +++ b/tests/test_configuration_common.py @@ -141,6 +141,7 @@ class ConfigTester: # Verify that loading with subconfig class results in same dict as if we loaded with general composite config class sub_config_loaded_dict = sub_config_loaded.to_dict() sub_config_loaded_dict.pop("transformers_version", None) + general_config_dict[sub_config_key].pop("transformers_version", None) self.parent.assertEqual(sub_config_loaded_dict, general_config_dict[sub_config_key]) # Verify that the loaded config type is same as in the general config diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index e60537e302..8081317505 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -4812,6 +4812,25 @@ class ModelTesterMixin: f"All parameters should be on meta device, but found {unique_devices}.", ) + def test_config_attn_implementation_setter(self): + config, _ = self.model_tester.prepare_config_and_inputs_for_common() + + def check_attn_implementation_setter(config: PretrainedConfig, attn_implementation: str): + if not config._attn_implementation == attn_implementation: + raise ValueError( + f"Unexpected attn_implementation for config {config.__class__.__name__}: " + f"{config._attn_implementation} != {attn_implementation}" + ) + for attribute_value in config.__dict__.values(): + if isinstance(attribute_value, PretrainedConfig): + check_attn_implementation_setter(attribute_value, attn_implementation) + + config._attn_implementation = "eager" + check_attn_implementation_setter(config, "eager") + + config._attn_implementation = "sdpa" + check_attn_implementation_setter(config, "sdpa") + def test_internal_model_config_and_subconfig_are_same(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() subconfig_keys = list(config.sub_configs.keys())