Add option to load a pretrained model with mismatched shapes (#12664)

* Add option to load a pretrained model with mismatched shapes

* Fail at loading when mismatched shapes in Flax

* Fix tests

* Update src/transformers/modeling_flax_utils.py

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

* Address review comments

Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
Sylvain Gugger
2021-07-13 10:15:15 -04:00
committed by GitHub
parent 7f6d375029
commit 90178b0cef
9 changed files with 228 additions and 67 deletions

View File

@@ -24,17 +24,19 @@ import numpy as np
import transformers
from huggingface_hub import HfApi
from requests.exceptions import HTTPError
from transformers import BertConfig, FlaxBertModel, is_flax_available, is_torch_available
from transformers import BertConfig, is_flax_available, is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import (
ENDPOINT_STAGING,
PASS,
USER,
CaptureLogger,
is_pt_flax_cross_test,
is_staging_test,
require_flax,
slow,
)
from transformers.utils import logging
if is_flax_available():
@@ -45,7 +47,13 @@ if is_flax_available():
import jaxlib.xla_extension as jax_xla
from flax.core.frozen_dict import unfreeze
from flax.traverse_util import flatten_dict
from transformers import FLAX_MODEL_FOR_QUESTION_ANSWERING_MAPPING, FLAX_MODEL_MAPPING
from transformers import (
FLAX_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
FLAX_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
FLAX_MODEL_MAPPING,
FlaxAutoModelForSequenceClassification,
FlaxBertModel,
)
from transformers.modeling_flax_pytorch_utils import (
convert_pytorch_state_dict_to_flax,
load_flax_weights_in_pytorch_model,
@@ -516,6 +524,32 @@ class FlaxModelTesterMixin:
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
)
def test_load_with_mismatched_shapes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
if model_class not in get_values(FLAX_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING):
continue
with self.subTest(msg=f"Testing {model_class}"):
with tempfile.TemporaryDirectory() as tmp_dir:
model = model_class(config)
model.save_pretrained(tmp_dir)
# Fails when we don't set ignore_mismatched_sizes=True
with self.assertRaises(ValueError):
new_model = FlaxAutoModelForSequenceClassification.from_pretrained(tmp_dir, num_labels=42)
logger = logging.get_logger("transformers.modeling_flax_utils")
with CaptureLogger(logger) as cl:
new_model = FlaxAutoModelForSequenceClassification.from_pretrained(
tmp_dir, num_labels=42, ignore_mismatched_sizes=True
)
self.assertIn("the shapes did not match", cl.out)
logits = new_model(**inputs_dict)["logits"]
self.assertEqual(logits.shape[1], 42)
@require_flax
@is_staging_test