Remove low_cpu_mem_usage and _fast_init (#36963)

* Remove low_cpu_mem_usage and _fast_init

* Update deepspeed.py

* Update modeling_utils.py

* remove the first 2 tests everywhere

* Update test_modeling_common.py

* remove what was remaining about fast_init

* fix logic and simplify

* mismatched keys logic update

* Update modeling_utils.py

* Update modeling_utils.py

* Update modeling_utils.py

* Update modeling_utils.py

* fix 2 models init_weights

* extend to others

* remove grad

* Update modeling_fsmt.py

* init weights in tests

* style

* Update test_modeling_fsmt.py

* more old models

* fix more init_weights

* copies

* fix

* style

* Update modeling_lxmert.py

* fix inits

* more and more

* more

* should finalize

* style

* Update modeling_dinov2_with_registers.py

* fix

* Update modeling_encoder_decoder.py

* fix

* style

* Update modeling_lxmert.py

* post rebase cleanup

* Update modeling_informer.py

* back to start for device

* fix

* add test to detect all failing cases correctly

* Update test_modeling_common.py

* fix

* fix

* sam

* style

* Update modeling_maskformer_swin.py

* CIs

* CIs

* remove test - will add it on separate PR

* fix

* fix

* Update modeling_sam.py

* CIs

* CIs

* CIs

* convnext

* suggestions

* CIs

* fix copies after merge

---------

Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
Cyril Vallez
2025-03-31 17:18:43 +02:00
committed by GitHub
parent 8805600406
commit f304318f5f
128 changed files with 464 additions and 1165 deletions

View File

@@ -14,11 +14,8 @@
# limitations under the License.
"""Testing suite for the PyTorch BridgeTower model."""
import tempfile
import unittest
import numpy as np
from transformers import (
BridgeTowerConfig,
BridgeTowerTextConfig,
@@ -359,39 +356,6 @@ class BridgeTowerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
model = BridgeTowerModel.from_pretrained(model_name)
self.assertIsNotNone(model)
@slow
def test_save_load_fast_init_from_base(self):
# Override as it is a slow test on this model
super().test_save_load_fast_init_from_base()
# Override as extracting meaningful tensor from output is different for BridgeTower
def test_save_load(self):
config, input_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**input_dict)
out_2 = self.extract_output(outputs, model_class.__name__)
out_2 = out_2.cpu().numpy()
out_2[np.isnan(out_2)] = 0
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model = model_class.from_pretrained(tmpdirname)
model.to(torch_device)
with torch.no_grad():
after_outputs = model(**input_dict)
# Make sure we don't have nans
out_1 = self.extract_output(after_outputs, model_class.__name__)
out_1 = out_1.cpu().numpy()
out_1[np.isnan(out_1)] = 0
max_diff = np.amax(np.abs(out_1 - out_2))
self.assertLessEqual(max_diff, 1e-5)
# Override this as `hidden states output` is different for BridgeTower
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):