[roberta] fix lm_head.decoder.weight ignore_key handling (#12446)

* fix lm_head.decoder.weight ignore_key handling

* fix the mutable class variable

* Update src/transformers/models/roberta/modeling_roberta.py

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

* replicate the comment

* make deterministic

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
Stas Bekman
2021-07-01 10:31:19 -07:00
committed by GitHub
parent 7f0027db30
commit 2d1d92181a
4 changed files with 48 additions and 8 deletions

View File

@@ -164,7 +164,7 @@ class ModelTesterMixin:
max_diff = np.amax(np.abs(out_1 - out_2))
self.assertLessEqual(max_diff, 1e-5)
def test_save_load__keys_to_ignore_on_save(self):
def test_save_load_keys_to_ignore_on_save(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
@@ -175,7 +175,7 @@ class ModelTesterMixin:
# check the keys are in the original state_dict
for k in _keys_to_ignore_on_save:
self.assertIn(k, model.state_dict())
self.assertIn(k, model.state_dict().keys(), "\n".join(model.state_dict().keys()))
# check that certain keys didn't get saved with the model
with tempfile.TemporaryDirectory() as tmpdirname:
@@ -183,7 +183,7 @@ class ModelTesterMixin:
output_model_file = os.path.join(tmpdirname, WEIGHTS_NAME)
state_dict_saved = torch.load(output_model_file)
for k in _keys_to_ignore_on_save:
self.assertNotIn(k, state_dict_saved)
self.assertNotIn(k, state_dict_saved.keys(), "\n".join(state_dict_saved.keys()))
# Test we can load the state dict in the model, necessary for the checkpointing API in Trainer.
load_result = model.load_state_dict(state_dict_saved, strict=False)

View File

@@ -15,9 +15,10 @@
import unittest
from copy import deepcopy
from transformers import is_torch_available
from transformers.testing_utils import require_torch, slow, torch_device
from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device
from .test_configuration_common import ConfigTester
from .test_generation_utils import GenerationTesterMixin
@@ -43,6 +44,8 @@ if is_torch_available():
create_position_ids_from_input_ids,
)
ROBERTA_TINY = "sshleifer/tiny-distilroberta-base"
class RobertaModelTester:
def __init__(
@@ -475,7 +478,7 @@ class RobertaModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCas
@require_torch
class RobertaModelIntegrationTest(unittest.TestCase):
class RobertaModelIntegrationTest(TestCasePlus):
@slow
def test_inference_masked_lm(self):
model = RobertaForMaskedLM.from_pretrained("roberta-base")
@@ -527,3 +530,23 @@ class RobertaModelIntegrationTest(unittest.TestCase):
# expected_tensor = roberta.predict("mnli", input_ids, return_logits=True).detach()
self.assertTrue(torch.allclose(output, expected_tensor, atol=1e-4))
# XXX: this might be a candidate for common tests if we have many of those
def test_lm_head_ignore_keys(self):
keys_to_ignore_on_save_tied = [r"lm_head.decoder.weight", r"lm_head.decoder.bias"]
keys_to_ignore_on_save_untied = [r"lm_head.decoder.bias"]
config = RobertaConfig.from_pretrained(ROBERTA_TINY)
config_tied = deepcopy(config)
config_tied.tie_word_embeddings = True
config_untied = deepcopy(config)
config_untied.tie_word_embeddings = False
for cls in [RobertaForMaskedLM, RobertaForCausalLM]:
model = cls(config_tied)
self.assertEqual(model._keys_to_ignore_on_save, keys_to_ignore_on_save_tied, cls)
# the keys should be different when embeddings aren't tied
model = cls(config_untied)
self.assertEqual(model._keys_to_ignore_on_save, keys_to_ignore_on_save_untied, cls)
# test that saving works with updated ignore keys - just testing that it doesn't fail
model.save_pretrained(self.get_auto_remove_tmp_dir())