Fix missing initializations for models created in 2023 (#39239)
* fix SwiftFormer * fix Kosmos2 * fix Owlv2 * fix Sam * fix Vits * fix Pvt * fix MobileViTV2 * fix PatchTST * fix Bros * fix Informer * fix BridgeTower * fix Mra and Yoso * fix Rwkv * fix EfficientNet * fix NllbMoe * fix Tvp * fix Clap * fix Autoformer * fix SwiftFormer * fix Mgpstr * fix Align * fix VitMatte * fix SpeechT5 * add conditional check for parameters * fix SpeechT5 * fix TimmBackbone and Clvp * fix SwiftFormer * fix SeamlessM4T and SeamlessM4Tv2 * fix Align * fix Owlv2 and OwlViT * add reviewed changes * add reviewed changes * fix typo --------- Co-authored-by: Cyril Vallez <cyril.vallez@huggingface.co>
This commit is contained in:
@@ -536,7 +536,7 @@ class ClapModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
for name, param in model.named_parameters():
|
||||
if param.requires_grad:
|
||||
# check if `logit_scale` is initialized as per the original implementation
|
||||
if name == "logit_scale":
|
||||
if "logit_scale" in name:
|
||||
self.assertAlmostEqual(
|
||||
param.data.item(),
|
||||
np.log(1 / 0.07),
|
||||
|
||||
@@ -13,10 +13,9 @@
|
||||
# limitations under the License.
|
||||
"""Testing suite for the PyTorch SwiftFormer model."""
|
||||
|
||||
import copy
|
||||
import unittest
|
||||
|
||||
from transformers import PretrainedConfig, SwiftFormerConfig
|
||||
from transformers import SwiftFormerConfig
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_vision,
|
||||
@@ -26,7 +25,7 @@ from transformers.testing_utils import (
|
||||
from transformers.utils import cached_property, is_torch_available, is_vision_available
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
|
||||
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
|
||||
from ...test_pipeline_mixin import PipelineTesterMixin
|
||||
|
||||
|
||||
@@ -234,16 +233,6 @@ class SwiftFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
|
||||
check_hidden_states_output(inputs_dict, config, model_class)
|
||||
|
||||
def test_initialization(self):
|
||||
def _config_zero_init(config):
|
||||
configs_no_init = copy.deepcopy(config)
|
||||
for key in configs_no_init.__dict__.keys():
|
||||
if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key:
|
||||
setattr(configs_no_init, key, 1e-10)
|
||||
if isinstance(getattr(configs_no_init, key, None), PretrainedConfig):
|
||||
no_init_subconfig = _config_zero_init(getattr(configs_no_init, key))
|
||||
setattr(configs_no_init, key, no_init_subconfig)
|
||||
return configs_no_init
|
||||
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
configs_no_init = _config_zero_init(config)
|
||||
|
||||
@@ -136,6 +136,10 @@ class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, PipelineTeste
|
||||
def test_hidden_states_output(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="TimmBackbone initialization is managed on the timm side")
|
||||
def test_can_init_all_missing_weights(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="TimmBackbone initialization is managed on the timm side")
|
||||
def test_initialization(self):
|
||||
pass
|
||||
|
||||
@@ -854,9 +854,8 @@ class ModelTesterMixin:
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
# For now, skip everything older than 2024 and "important models" (too much models to patch otherwise)
|
||||
# Use `supports_cache_class` as a proxy to judge "important" models in order to prioritize them
|
||||
# TODO: relax this as we patch more and more models
|
||||
if addition_year < 2024:
|
||||
if addition_year < 2023:
|
||||
self.skipTest(reason=f"{model_class} is not a priorited model for now.")
|
||||
|
||||
# Monkey patch the method to add a seed (we do it on PreTrainedModel._initialize_weights, which wraps
|
||||
|
||||
Reference in New Issue
Block a user