Add tie_weights() to LM heads and set bias in set_output_embeddings() (#28948)

* Add tie_weights() to LM heads and set bias in set_output_embeddings()

The bias were not tied correctly in some LM heads, and this change should fix that.

* Moving test_save_and_load_low_cpu_mem_usage to ModelTesterMixin

* Adding _tie_weights() to MPNet and Vilt

* Skip test for low cpu mem usage for Deta/DeformableDetr since they cannot init on meta device

* Rename to test name to save_load to match the convention
This commit is contained in:
JB (Don)
2024-02-15 04:39:01 +08:00
committed by GitHub
parent 3f4e79d29c
commit 725f4ad1cc
20 changed files with 104 additions and 0 deletions

View File

@@ -435,6 +435,23 @@ class ModelTesterMixin:
max_diff = (model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key]).sum().item()
self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical")
def test_save_load_low_cpu_mem_usage(self):
with tempfile.TemporaryDirectory() as tmpdirname:
for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
model_to_save = model_class(config)
model_to_save.save_pretrained(tmpdirname)
model = model_class.from_pretrained(
tmpdirname,
low_cpu_mem_usage=True,
)
# The low_cpu_mem_usage=True causes the model params to be initialized with device=meta. If there are
# any unloaded or untied parameters, then trying to move it to device=torch_device will throw an error.
model.to(torch_device)
def test_fast_init_context_manager(self):
# 1. Create a dummy class. Should have buffers as well? To make sure we test __init__
class MyClass(PreTrainedModel):