Fix Failed tests with mobile bert resize tokens embedding (#33950)

* Fix Failed tests with mobile bert * Cast to the correct dtype * Code fixup * Fix padding_idx larger that embedding_size * Reduce covariance more. use 1e-7 instead of 1e-5 * Comment fix * Reduce covariance more. use 1e-9 instead of 1e-7 * Copy new config * all but MRA fixed * fix mra * very flaky * skip instead * make fixup --------- Co-authored-by: Joao Gante <joao@huggingface.co>
2024-10-09 13:23:50 +03:00
parent faa0f63b93
commit cdee5285ca
6 changed files with 42 additions and 18 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -1857,7 +1857,8 @@ class ModelTesterMixin:
            # Check that the model can still do a forward pass successfully (every parameter should be resized)
            if not is_deepspeed_zero3_enabled():
                # A distriputed launcher is needed for the forward pass when deepspeed is enabled
-                model(**self._prepare_for_class(inputs_dict, model_class))
+                model_inputs = self._prepare_for_class(inputs_dict, model_class)
+                model(**model_inputs)

            # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
            model_embed = model.resize_token_embeddings(model_vocab_size - 15)
@@ -1875,7 +1876,8 @@ class ModelTesterMixin:
                # A distriputed launcher is needed for the forward pass when deepspeed is enabled
                if "decoder_input_ids" in inputs_dict:
                    inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
-                model(**self._prepare_for_class(inputs_dict, model_class))
+                model_inputs = self._prepare_for_class(inputs_dict, model_class)
+                model(**model_inputs)

            # Check that adding and removing tokens has not modified the first part of the embedding matrix.
            models_equal = True
@@ -1886,6 +1888,9 @@ class ModelTesterMixin:
            self.assertTrue(models_equal)

            del model
+            del config
+            # Copy again. config changed with embedding resizing (`vocab_size` changed)
+            config = copy.deepcopy(original_config)
            if is_deepspeed_zero3_enabled():
                with deepspeed.zero.Init():
                    model = model_class(config)
@@ -1921,7 +1926,11 @@ class ModelTesterMixin:

            # Test when `vocab_size` is smaller than `hidden_size`.
            del model
+            del config
+            # Copy again. config changed with embedding resizing (`vocab_size` changed)
+            config = copy.deepcopy(original_config)
            config.vocab_size = 4
+            config.pad_token_id = 3
            if is_deepspeed_zero3_enabled():
                with deepspeed.zero.Init():
                    model = model_class(config)
@@ -2026,7 +2035,7 @@ class ModelTesterMixin:
                old_embeddings_mean = torch.mean(output_embeds.weight.data[:-10, :], axis=0)
                new_embeddings_mean = torch.mean(output_embeds.weight.data[-10:, :], axis=0)
            torch.testing.assert_close(old_embeddings_mean, new_embeddings_mean, atol=1e-3, rtol=1e-1)
-            # check if the bias is always initialized with zero.
+            # check if the old bias mean close to added bias mean.
            if output_embeds.bias is not None:
                if is_deepspeed_zero3_enabled():
                    with deepspeed.zero.GatheredParameters(output_embeds.bias, modifier_rank=None):