diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index a5cc3d5303..7a6cd43638 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -98,6 +98,9 @@ class ConditionalDetrConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `True`): Whether to use pretrained weights for the backbone. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -168,6 +171,7 @@ class ConditionalDetrConfig(PretrainedConfig): position_embedding_type="sine", backbone="resnet50", use_pretrained_backbone=True, + backbone_kwargs=None, dilation=False, class_cost=2, bbox_cost=5, @@ -191,6 +195,9 @@ class ConditionalDetrConfig(PretrainedConfig): if backbone_config is not None and use_timm_backbone: raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.") + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + if not use_timm_backbone: if backbone_config is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.") @@ -224,6 +231,7 @@ class ConditionalDetrConfig(PretrainedConfig): self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.backbone_kwargs = backbone_kwargs self.dilation = dilation # Hungarian matcher self.class_cost = class_cost diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index e9a4cde2df..eb3b3807ab 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -90,6 +90,9 @@ class DeformableDetrConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, defaults to `True`): Whether to use pretrained weights for the backbone. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -177,6 +180,7 @@ class DeformableDetrConfig(PretrainedConfig): position_embedding_type="sine", backbone="resnet50", use_pretrained_backbone=True, + backbone_kwargs=None, dilation=False, num_feature_levels=4, encoder_n_points=4, @@ -207,6 +211,9 @@ class DeformableDetrConfig(PretrainedConfig): if backbone_config is not None and use_timm_backbone: raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.") + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + if not use_timm_backbone: if backbone_config is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.") @@ -238,6 +245,7 @@ class DeformableDetrConfig(PretrainedConfig): self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.backbone_kwargs = backbone_kwargs self.dilation = dilation # deformable attributes self.num_feature_levels = num_feature_levels diff --git a/src/transformers/models/deta/configuration_deta.py b/src/transformers/models/deta/configuration_deta.py index 633d6267ef..378d322361 100644 --- a/src/transformers/models/deta/configuration_deta.py +++ b/src/transformers/models/deta/configuration_deta.py @@ -49,6 +49,9 @@ class DetaConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. num_queries (`int`, *optional*, defaults to 900): Number of object queries, i.e. detection slots. This is the maximal number of objects [`DetaModel`] can detect in a single image. In case `two_stage` is set to `True`, we use `two_stage_num_proposals` instead. @@ -150,6 +153,7 @@ class DetaConfig(PretrainedConfig): backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, num_queries=900, max_position_embeddings=2048, encoder_layers=6, @@ -204,10 +208,14 @@ class DetaConfig(PretrainedConfig): config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs self.num_queries = num_queries self.max_position_embeddings = max_position_embeddings self.d_model = d_model diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index acaf0dfe1e..f13c1ef09a 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -98,6 +98,9 @@ class DetrConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `True`): Whether to use pretrained weights for the backbone. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -166,6 +169,7 @@ class DetrConfig(PretrainedConfig): position_embedding_type="sine", backbone="resnet50", use_pretrained_backbone=True, + backbone_kwargs=None, dilation=False, class_cost=1, bbox_cost=5, @@ -188,6 +192,9 @@ class DetrConfig(PretrainedConfig): if backbone_config is not None and use_timm_backbone: raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.") + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + if not use_timm_backbone: if backbone_config is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.") @@ -223,6 +230,7 @@ class DetrConfig(PretrainedConfig): self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.backbone_kwargs = backbone_kwargs self.dilation = dilation # Hungarian matcher self.class_cost = class_cost diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index e6567f719d..97b9e2e9a8 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -120,6 +120,9 @@ class DPTConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. Example: @@ -173,6 +176,7 @@ class DPTConfig(PretrainedConfig): backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, **kwargs, ): super().__init__(**kwargs) @@ -230,9 +234,13 @@ class DPTConfig(PretrainedConfig): if use_autobackbone and backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs self.num_hidden_layers = None if use_autobackbone else num_hidden_layers self.num_attention_heads = None if use_autobackbone else num_attention_heads self.intermediate_size = None if use_autobackbone else intermediate_size diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index 0d27ba39cb..0b5aa9aa0c 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -56,6 +56,9 @@ class Mask2FormerConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. feature_size (`int`, *optional*, defaults to 256): The features (channels) of the resulting feature maps. mask_feature_size (`int`, *optional*, defaults to 256): @@ -163,9 +166,10 @@ class Mask2FormerConfig(PretrainedConfig): use_auxiliary_loss: bool = True, feature_strides: List[int] = [4, 8, 16, 32], output_auxiliary_logits: bool = None, - backbone=None, - use_pretrained_backbone=False, - use_timm_backbone=False, + backbone: Optional[str] = None, + use_pretrained_backbone: bool = False, + use_timm_backbone: bool = False, + backbone_kwargs: Optional[Dict] = None, **kwargs, ): if use_pretrained_backbone: @@ -189,6 +193,9 @@ class Mask2FormerConfig(PretrainedConfig): out_features=["stage1", "stage2", "stage3", "stage4"], ) + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + if isinstance(backbone_config, dict): backbone_model_type = backbone_config.pop("model_type") config_class = CONFIG_MAPPING[backbone_model_type] @@ -233,6 +240,7 @@ class Mask2FormerConfig(PretrainedConfig): self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs super().__init__(**kwargs) diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index e906ceb2b3..758ac4eb20 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -66,6 +66,9 @@ class MaskFormerConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. decoder_config (`Dict`, *optional*): The configuration passed to the transformer decoder model, if unset the base config for `detr-resnet-50` will be used. @@ -126,6 +129,7 @@ class MaskFormerConfig(PretrainedConfig): backbone: Optional[str] = None, use_pretrained_backbone: bool = False, use_timm_backbone: bool = False, + backbone_kwargs: Optional[Dict] = None, **kwargs, ): if use_pretrained_backbone: @@ -134,6 +138,9 @@ class MaskFormerConfig(PretrainedConfig): if backbone_config is not None and backbone is not None: raise ValueError("You can't specify both `backbone` and `backbone_config`.") + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + if backbone_config is None and backbone is None: # fall back to https://huggingface.co/microsoft/swin-base-patch4-window12-384-in22k backbone_config = SwinConfig( @@ -198,6 +205,7 @@ class MaskFormerConfig(PretrainedConfig): self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs super().__init__(**kwargs) @classmethod diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index b88e2c5590..c4c2851947 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -53,6 +53,9 @@ class OneFormerConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. ignore_value (`int`, *optional*, defaults to 255): Values to be ignored in GT label while calculating loss. num_queries (`int`, *optional*, defaults to 150): @@ -156,6 +159,7 @@ class OneFormerConfig(PretrainedConfig): backbone: Optional[str] = None, use_pretrained_backbone: bool = False, use_timm_backbone: bool = False, + backbone_kwargs: Optional[Dict] = None, ignore_value: int = 255, num_queries: int = 150, no_object_weight: int = 0.1, @@ -223,10 +227,14 @@ class OneFormerConfig(PretrainedConfig): config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs self.ignore_value = ignore_value self.num_queries = num_queries self.no_object_weight = no_object_weight diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 5a97ce05b3..12b62ee973 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -98,6 +98,9 @@ class TableTransformerConfig(PretrainedConfig): is `False`, this loads the backbone's config and uses that to initialize the backbone with random weights. use_pretrained_backbone (`bool`, *optional*, `True`): Whether to use pretrained weights for the backbone. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. dilation (`bool`, *optional*, defaults to `False`): Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when `use_timm_backbone` = `True`. @@ -167,6 +170,7 @@ class TableTransformerConfig(PretrainedConfig): position_embedding_type="sine", backbone="resnet50", use_pretrained_backbone=True, + backbone_kwargs=None, dilation=False, class_cost=1, bbox_cost=5, @@ -189,6 +193,9 @@ class TableTransformerConfig(PretrainedConfig): if backbone_config is not None and use_timm_backbone: raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.") + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + if not use_timm_backbone: if backbone_config is None: logger.info("`backbone_config` is `None`. Initializing the config with the default `ResNet` backbone.") @@ -224,6 +231,7 @@ class TableTransformerConfig(PretrainedConfig): self.position_embedding_type = position_embedding_type self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone + self.backbone_kwargs = backbone_kwargs self.dilation = dilation # Hungarian matcher self.class_cost = class_cost diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index 7e985ab84e..f39a0ab5df 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -52,6 +52,9 @@ class TvpConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. distance_loss_weight (`float`, *optional*, defaults to 1.0): The weight of distance loss. duration_loss_weight (`float`, *optional*, defaults to 0.1): @@ -107,6 +110,7 @@ class TvpConfig(PretrainedConfig): backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, distance_loss_weight=1.0, duration_loss_weight=0.1, visual_prompter_type="framepad", @@ -144,10 +148,14 @@ class TvpConfig(PretrainedConfig): config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs self.distance_loss_weight = distance_loss_weight self.duration_loss_weight = duration_loss_weight self.visual_prompter_type = visual_prompter_type diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index 9288bd67b6..609818c80d 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -45,6 +45,9 @@ class UperNetConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. hidden_size (`int`, *optional*, defaults to 512): The number of hidden units in the convolutional layers. initializer_range (`float`, *optional*, defaults to 0.02): @@ -87,6 +90,7 @@ class UperNetConfig(PretrainedConfig): backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, hidden_size=512, initializer_range=0.02, pool_scales=[1, 2, 3, 6], @@ -114,10 +118,14 @@ class UperNetConfig(PretrainedConfig): config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs self.hidden_size = hidden_size self.initializer_range = initializer_range self.pool_scales = pool_scales diff --git a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py index 30ebe4fba6..2875e62dd4 100644 --- a/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/configuration_vit_hybrid.py @@ -51,6 +51,9 @@ class ViTHybridConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. hidden_size (`int`, *optional*, defaults to 768): Dimensionality of the encoder layers and the pooler layer. num_hidden_layers (`int`, *optional*, defaults to 12): @@ -104,6 +107,7 @@ class ViTHybridConfig(PretrainedConfig): backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, @@ -137,6 +141,9 @@ class ViTHybridConfig(PretrainedConfig): "embedding_dynamic_padding": True, } + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + if isinstance(backbone_config, dict): if "model_type" in backbone_config: backbone_config_class = CONFIG_MAPPING[backbone_config["model_type"]] @@ -152,6 +159,7 @@ class ViTHybridConfig(PretrainedConfig): self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 4d2bcc612f..13f9942c9e 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -51,6 +51,9 @@ class VitMatteConfig(PretrainedConfig): use_timm_backbone (`bool`, *optional*, defaults to `False`): Whether to load `backbone` from the timm library. If `False`, the backbone is loaded from the transformers library. + backbone_kwargs (`dict`, *optional*): + Keyword arguments to be passed to AutoBackbone when loading from a checkpoint + e.g. `{'out_indices': (0, 1, 2, 3)}`. Cannot be specified if `backbone_config` is set. hidden_size (`int`, *optional*, defaults to 384): The number of input channels of the decoder. batch_norm_eps (`float`, *optional*, defaults to 1e-05): @@ -85,6 +88,7 @@ class VitMatteConfig(PretrainedConfig): backbone=None, use_pretrained_backbone=False, use_timm_backbone=False, + backbone_kwargs=None, hidden_size: int = 384, batch_norm_eps: float = 1e-5, initializer_range: float = 0.02, @@ -108,10 +112,14 @@ class VitMatteConfig(PretrainedConfig): config_class = CONFIG_MAPPING[backbone_model_type] backbone_config = config_class.from_dict(backbone_config) + if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") + self.backbone_config = backbone_config self.backbone = backbone self.use_pretrained_backbone = use_pretrained_backbone self.use_timm_backbone = use_timm_backbone + self.backbone_kwargs = backbone_kwargs self.batch_norm_eps = batch_norm_eps self.hidden_size = hidden_size self.initializer_range = initializer_range diff --git a/src/transformers/utils/backbone_utils.py b/src/transformers/utils/backbone_utils.py index 22c35c3f9b..14fcfe4a50 100644 --- a/src/transformers/utils/backbone_utils.py +++ b/src/transformers/utils/backbone_utils.py @@ -304,6 +304,12 @@ def load_backbone(config): use_timm_backbone = getattr(config, "use_timm_backbone", None) use_pretrained_backbone = getattr(config, "use_pretrained_backbone", None) backbone_checkpoint = getattr(config, "backbone", None) + backbone_kwargs = getattr(config, "backbone_kwargs", None) + + backbone_kwargs = {} if backbone_kwargs is None else backbone_kwargs + + if backbone_kwargs and backbone_config is not None: + raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.") # If there is a backbone_config and a backbone checkpoint, and use_pretrained_backbone=False then the desired # behaviour is ill-defined: do you want to load from the checkpoint's config or the backbone_config? @@ -317,7 +323,7 @@ def load_backbone(config): and backbone_checkpoint is None and backbone_checkpoint is None ): - return AutoBackbone.from_config(config=config) + return AutoBackbone.from_config(config=config, **backbone_kwargs) # config from the parent model that has a backbone if use_timm_backbone: @@ -326,16 +332,19 @@ def load_backbone(config): # Because of how timm backbones were originally added to models, we need to pass in use_pretrained_backbone # to determine whether to load the pretrained weights. backbone = AutoBackbone.from_pretrained( - backbone_checkpoint, use_timm_backbone=use_timm_backbone, use_pretrained_backbone=use_pretrained_backbone + backbone_checkpoint, + use_timm_backbone=use_timm_backbone, + use_pretrained_backbone=use_pretrained_backbone, + **backbone_kwargs, ) elif use_pretrained_backbone: if backbone_checkpoint is None: raise ValueError("config.backbone must be set if use_pretrained_backbone is True") - backbone = AutoBackbone.from_pretrained(backbone_checkpoint) + backbone = AutoBackbone.from_pretrained(backbone_checkpoint, **backbone_kwargs) else: if backbone_config is None and backbone_checkpoint is None: raise ValueError("Either config.backbone_config or config.backbone must be set") if backbone_config is None: - backbone_config = AutoConfig.from_pretrained(backbone_checkpoint) + backbone_config = AutoConfig.from_pretrained(backbone_checkpoint, **backbone_kwargs) backbone = AutoBackbone.from_config(config=backbone_config) return backbone diff --git a/tests/utils/test_backbone_utils.py b/tests/utils/test_backbone_utils.py index 0c3ff4866e..cd9a5a29a8 100644 --- a/tests/utils/test_backbone_utils.py +++ b/tests/utils/test_backbone_utils.py @@ -16,7 +16,7 @@ import unittest import pytest -from transformers import DetrConfig, MaskFormerConfig +from transformers import DetrConfig, MaskFormerConfig, ResNetBackbone, ResNetConfig, TimmBackbone from transformers.testing_utils import require_torch, slow from transformers.utils.backbone_utils import ( BackboneMixin, @@ -137,6 +137,65 @@ class BackboneUtilsTester(unittest.TestCase): self.assertEqual(backbone.out_features, ["a", "c"]) self.assertEqual(backbone.out_indices, [-3, -1]) + @slow + @require_torch + def test_load_backbone_from_config(self): + """ + Test that load_backbone correctly loads a backbone from a backbone config. + """ + config = MaskFormerConfig(backbone_config=ResNetConfig(out_indices=(0, 2))) + backbone = load_backbone(config) + self.assertEqual(backbone.out_features, ["stem", "stage2"]) + self.assertEqual(backbone.out_indices, (0, 2)) + self.assertIsInstance(backbone, ResNetBackbone) + + @slow + @require_torch + def test_load_backbone_from_checkpoint(self): + """ + Test that load_backbone correctly loads a backbone from a checkpoint. + """ + config = MaskFormerConfig(backbone="microsoft/resnet-18", backbone_config=None) + backbone = load_backbone(config) + self.assertEqual(backbone.out_indices, [4]) + self.assertEqual(backbone.out_features, ["stage4"]) + self.assertIsInstance(backbone, ResNetBackbone) + + config = MaskFormerConfig( + backbone="resnet18", + use_timm_backbone=True, + ) + backbone = load_backbone(config) + # We can't know ahead of time the exact output features and indices, or the layer names before + # creating the timm model, so it defaults to the last layer (-1,) and has a different layer name + self.assertEqual(backbone.out_indices, (-1,)) + self.assertEqual(backbone.out_features, ["layer4"]) + self.assertIsInstance(backbone, TimmBackbone) + + @slow + @require_torch + def test_load_backbone_backbone_kwargs(self): + """ + Test that load_backbone correctly configures the loaded backbone with the provided kwargs. + """ + config = MaskFormerConfig(backbone="resnet18", use_timm_backbone=True, backbone_kwargs={"out_indices": (0, 1)}) + backbone = load_backbone(config) + self.assertEqual(backbone.out_indices, (0, 1)) + self.assertIsInstance(backbone, TimmBackbone) + + config = MaskFormerConfig(backbone="microsoft/resnet-18", backbone_kwargs={"out_indices": (0, 2)}) + backbone = load_backbone(config) + self.assertEqual(backbone.out_indices, (0, 2)) + self.assertIsInstance(backbone, ResNetBackbone) + + # Check can't be passed with a backone config + with pytest.raises(ValueError): + config = MaskFormerConfig( + backbone="microsoft/resnet-18", + backbone_config=ResNetConfig(out_indices=(0, 2)), + backbone_kwargs={"out_indices": (0, 1)}, + ) + @slow @require_torch def test_load_backbone_in_new_model(self): diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index 10ba5d1872..da4a121035 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -224,6 +224,7 @@ def check_attribute_being_used(config_class, attributes, default_value, source_s "backbone", "backbone_config", "use_timm_backbone", + "backbone_kwargs", ] attributes_used_in_generation = ["encoder_no_repeat_ngram_size"]