From 9d31b32e9d7b6d939f42035d6b5ee91cc395ac9a Mon Sep 17 00:00:00 2001 From: Raushan Turganbay Date: Wed, 1 May 2024 12:32:45 +0500 Subject: [PATCH] Use text config's vocab size in testing models (#30568) use text config's vocab size --- tests/models/llava/test_modeling_llava.py | 166 ------------------ .../llava_next/test_modeling_llava_next.py | 166 ------------------ .../models/vipllava/test_modeling_vipllava.py | 166 ------------------ tests/test_modeling_common.py | 53 ++++-- 4 files changed, 42 insertions(+), 509 deletions(-) diff --git a/tests/models/llava/test_modeling_llava.py b/tests/models/llava/test_modeling_llava.py index b4c57e7ba0..0d798f8841 100644 --- a/tests/models/llava/test_modeling_llava.py +++ b/tests/models/llava/test_modeling_llava.py @@ -14,7 +14,6 @@ # limitations under the License. """Testing suite for the PyTorch Llava model.""" -import copy import gc import unittest @@ -192,171 +191,6 @@ class LlavaForConditionalGenerationModelTest(ModelTesterMixin, unittest.TestCase def test_training_gradient_checkpointing_use_reentrant_false(self): pass - # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size - def test_resize_tokens_embeddings(self): - ( - original_config, - inputs_dict, - ) = self.model_tester.prepare_config_and_inputs_for_common() - if not self.test_resize_embeddings: - return - - for model_class in self.all_model_classes: - config = copy.deepcopy(original_config) - model = model_class(config) - model.to(torch_device) - - if self.model_tester.is_training is False: - model.eval() - - model_vocab_size = config.text_config.vocab_size - # Retrieve the embeddings and clone theme - model_embed = model.resize_token_embeddings(model_vocab_size) - cloned_embeddings = model_embed.weight.clone() - - # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size - model_embed = model.resize_token_embeddings(model_vocab_size + 10) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10) - # Check that it actually resizes the embeddings matrix - self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size - model_embed = model.resize_token_embeddings(model_vocab_size - 15) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15) - # Check that it actually resizes the embeddings matrix - self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15) - - # Check that the model can still do a forward pass successfully (every parameter should be resized) - # Input ids should be clamped to the maximum size of the vocabulary - inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1) - - # make sure that decoder_input_ids are resized as well - if "decoder_input_ids" in inputs_dict: - inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that adding and removing tokens has not modified the first part of the embedding matrix. - models_equal = True - for p1, p2 in zip(cloned_embeddings, model_embed.weight): - if p1.data.ne(p2.data).sum() > 0: - models_equal = False - - self.assertTrue(models_equal) - - config = copy.deepcopy(original_config) - model = model_class(config) - model.to(torch_device) - - model_vocab_size = config.text_config.vocab_size - model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1) - self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size) - - model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0] // 64, 0) - - self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size) - self.assertTrue(model.config.text_config.vocab_size, model.vocab_size) - - model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0] // 64, 0) - - # Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size - target_dimension = 128 - model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0], target_dimension) - - with self.assertRaisesRegex( - ValueError, - "Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer", - ): - model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3) - - # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size - def test_resize_embeddings_untied(self): - ( - original_config, - inputs_dict, - ) = self.model_tester.prepare_config_and_inputs_for_common() - if not self.test_resize_embeddings: - return - - original_config.tie_word_embeddings = False - - # if model cannot untied embeddings -> leave test - if original_config.tie_word_embeddings: - return - - for model_class in self.all_model_classes: - config = copy.deepcopy(original_config) - model = model_class(config).to(torch_device) - - # if no output embeddings -> leave test - if model.get_output_embeddings() is None: - continue - - # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size - model_vocab_size = config.text_config.vocab_size - model.resize_token_embeddings(model_vocab_size + 10) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10) - output_embeds = model.get_output_embeddings() - self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10) - # Check bias if present - if output_embeds.bias is not None: - self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size - model.resize_token_embeddings(model_vocab_size - 15) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15) - # Check that it actually resizes the embeddings matrix - output_embeds = model.get_output_embeddings() - self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15) - # Check bias if present - if output_embeds.bias is not None: - self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - # Input ids should be clamped to the maximum size of the vocabulary - inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1) - if "decoder_input_ids" in inputs_dict: - inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size - def test_tie_model_weights(self): - if not self.test_torchscript: - return - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - def check_same_values(layer_1, layer_2): - equal = True - for p1, p2 in zip(layer_1.weight, layer_2.weight): - if p1.data.ne(p2.data).sum() > 0: - equal = False - return equal - - for model_class in self.all_model_classes: - config.torchscript = True - model_not_tied = model_class(config) - if model_not_tied.get_output_embeddings() is None: - continue - - config_tied = copy.deepcopy(config) - config_tied.torchscript = False - model_tied = model_class(config_tied) - params_tied = list(model_tied.parameters()) - # Check that the embedding layer and decoding layer are the same in size and in value - # self.assertTrue(check_same_values(embeddings, decoding)) - - # Check that after resize they remain tied. - model_tied.resize_token_embeddings(config.text_config.vocab_size + 10) - params_tied_2 = list(model_tied.parameters()) - self.assertEqual(len(params_tied_2), len(params_tied)) - @require_torch class LlavaForConditionalGenerationIntegrationTest(unittest.TestCase): diff --git a/tests/models/llava_next/test_modeling_llava_next.py b/tests/models/llava_next/test_modeling_llava_next.py index 3656bb6505..78256be58c 100644 --- a/tests/models/llava_next/test_modeling_llava_next.py +++ b/tests/models/llava_next/test_modeling_llava_next.py @@ -14,7 +14,6 @@ # limitations under the License. """ Testing suite for the PyTorch Llava-NeXT model. """ -import copy import gc import unittest @@ -215,171 +214,6 @@ class LlavaNextForConditionalGenerationModelTest(ModelTesterMixin, GenerationTes def test_cpu_offload(self): pass - # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size - def test_resize_tokens_embeddings(self): - ( - original_config, - inputs_dict, - ) = self.model_tester.prepare_config_and_inputs_for_common() - if not self.test_resize_embeddings: - return - - for model_class in self.all_model_classes: - config = copy.deepcopy(original_config) - model = model_class(config) - model.to(torch_device) - - if self.model_tester.is_training is False: - model.eval() - - model_vocab_size = config.text_config.vocab_size - # Retrieve the embeddings and clone theme - model_embed = model.resize_token_embeddings(model_vocab_size) - cloned_embeddings = model_embed.weight.clone() - - # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size - model_embed = model.resize_token_embeddings(model_vocab_size + 10) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10) - # Check that it actually resizes the embeddings matrix - self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size - model_embed = model.resize_token_embeddings(model_vocab_size - 15) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15) - # Check that it actually resizes the embeddings matrix - self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15) - - # Check that the model can still do a forward pass successfully (every parameter should be resized) - # Input ids should be clamped to the maximum size of the vocabulary - inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1) - - # make sure that decoder_input_ids are resized as well - if "decoder_input_ids" in inputs_dict: - inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that adding and removing tokens has not modified the first part of the embedding matrix. - models_equal = True - for p1, p2 in zip(cloned_embeddings, model_embed.weight): - if p1.data.ne(p2.data).sum() > 0: - models_equal = False - - self.assertTrue(models_equal) - - config = copy.deepcopy(original_config) - model = model_class(config) - model.to(torch_device) - - model_vocab_size = config.text_config.vocab_size - model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1) - self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size) - - model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0] // 64, 0) - - self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size) - self.assertTrue(model.config.text_config.vocab_size, model.vocab_size) - - model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0] // 64, 0) - - # Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size - target_dimension = 128 - model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0], target_dimension) - - with self.assertRaisesRegex( - ValueError, - "Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer", - ): - model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3) - - # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size - def test_resize_embeddings_untied(self): - ( - original_config, - inputs_dict, - ) = self.model_tester.prepare_config_and_inputs_for_common() - if not self.test_resize_embeddings: - return - - original_config.tie_word_embeddings = False - - # if model cannot untied embeddings -> leave test - if original_config.tie_word_embeddings: - return - - for model_class in self.all_model_classes: - config = copy.deepcopy(original_config) - model = model_class(config).to(torch_device) - - # if no output embeddings -> leave test - if model.get_output_embeddings() is None: - continue - - # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size - model_vocab_size = config.text_config.vocab_size - model.resize_token_embeddings(model_vocab_size + 10) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10) - output_embeds = model.get_output_embeddings() - self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10) - # Check bias if present - if output_embeds.bias is not None: - self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size - model.resize_token_embeddings(model_vocab_size - 15) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15) - # Check that it actually resizes the embeddings matrix - output_embeds = model.get_output_embeddings() - self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15) - # Check bias if present - if output_embeds.bias is not None: - self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - # Input ids should be clamped to the maximum size of the vocabulary - inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1) - if "decoder_input_ids" in inputs_dict: - inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size - def test_tie_model_weights(self): - if not self.test_torchscript: - return - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - def check_same_values(layer_1, layer_2): - equal = True - for p1, p2 in zip(layer_1.weight, layer_2.weight): - if p1.data.ne(p2.data).sum() > 0: - equal = False - return equal - - for model_class in self.all_model_classes: - config.torchscript = True - model_not_tied = model_class(config) - if model_not_tied.get_output_embeddings() is None: - continue - - config_tied = copy.deepcopy(config) - config_tied.torchscript = False - model_tied = model_class(config_tied) - params_tied = list(model_tied.parameters()) - # Check that the embedding layer and decoding layer are the same in size and in value - # self.assertTrue(check_same_values(embeddings, decoding)) - - # Check that after resize they remain tied. - model_tied.resize_token_embeddings(config.text_config.vocab_size + 10) - params_tied_2 = list(model_tied.parameters()) - self.assertEqual(len(params_tied_2), len(params_tied)) - @require_torch class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase): diff --git a/tests/models/vipllava/test_modeling_vipllava.py b/tests/models/vipllava/test_modeling_vipllava.py index e783b34700..ff84f71784 100644 --- a/tests/models/vipllava/test_modeling_vipllava.py +++ b/tests/models/vipllava/test_modeling_vipllava.py @@ -14,7 +14,6 @@ # limitations under the License. """ Testing suite for the PyTorch VipLlava model. """ -import copy import gc import unittest @@ -186,171 +185,6 @@ class VipLlavaForConditionalGenerationModelTest(ModelTesterMixin, unittest.TestC def test_training_gradient_checkpointing_use_reentrant_false(self): pass - # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size - def test_resize_tokens_embeddings(self): - ( - original_config, - inputs_dict, - ) = self.model_tester.prepare_config_and_inputs_for_common() - if not self.test_resize_embeddings: - return - - for model_class in self.all_model_classes: - config = copy.deepcopy(original_config) - model = model_class(config) - model.to(torch_device) - - if self.model_tester.is_training is False: - model.eval() - - model_vocab_size = config.text_config.vocab_size - # Retrieve the embeddings and clone theme - model_embed = model.resize_token_embeddings(model_vocab_size) - cloned_embeddings = model_embed.weight.clone() - - # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size - model_embed = model.resize_token_embeddings(model_vocab_size + 10) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10) - # Check that it actually resizes the embeddings matrix - self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size - model_embed = model.resize_token_embeddings(model_vocab_size - 15) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15) - # Check that it actually resizes the embeddings matrix - self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15) - - # Check that the model can still do a forward pass successfully (every parameter should be resized) - # Input ids should be clamped to the maximum size of the vocabulary - inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1) - - # make sure that decoder_input_ids are resized as well - if "decoder_input_ids" in inputs_dict: - inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that adding and removing tokens has not modified the first part of the embedding matrix. - models_equal = True - for p1, p2 in zip(cloned_embeddings, model_embed.weight): - if p1.data.ne(p2.data).sum() > 0: - models_equal = False - - self.assertTrue(models_equal) - - config = copy.deepcopy(original_config) - model = model_class(config) - model.to(torch_device) - - model_vocab_size = config.text_config.vocab_size - model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1) - self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size) - - model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0] // 64, 0) - - self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size) - self.assertTrue(model.config.text_config.vocab_size, model.vocab_size) - - model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0] // 64, 0) - - # Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size - target_dimension = 128 - model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64) - self.assertTrue(model_embed.weight.shape[0], target_dimension) - - with self.assertRaisesRegex( - ValueError, - "Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer", - ): - model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3) - - # Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size - def test_resize_embeddings_untied(self): - ( - original_config, - inputs_dict, - ) = self.model_tester.prepare_config_and_inputs_for_common() - if not self.test_resize_embeddings: - return - - original_config.tie_word_embeddings = False - - # if model cannot untied embeddings -> leave test - if original_config.tie_word_embeddings: - return - - for model_class in self.all_model_classes: - config = copy.deepcopy(original_config) - model = model_class(config).to(torch_device) - - # if no output embeddings -> leave test - if model.get_output_embeddings() is None: - continue - - # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size - model_vocab_size = config.text_config.vocab_size - model.resize_token_embeddings(model_vocab_size + 10) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10) - output_embeds = model.get_output_embeddings() - self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10) - # Check bias if present - if output_embeds.bias is not None: - self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size - model.resize_token_embeddings(model_vocab_size - 15) - self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15) - # Check that it actually resizes the embeddings matrix - output_embeds = model.get_output_embeddings() - self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15) - # Check bias if present - if output_embeds.bias is not None: - self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - # Input ids should be clamped to the maximum size of the vocabulary - inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1) - if "decoder_input_ids" in inputs_dict: - inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1) - # Check that the model can still do a forward pass successfully (every parameter should be resized) - model(**self._prepare_for_class(inputs_dict, model_class)) - - # Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size - def test_tie_model_weights(self): - if not self.test_torchscript: - return - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - def check_same_values(layer_1, layer_2): - equal = True - for p1, p2 in zip(layer_1.weight, layer_2.weight): - if p1.data.ne(p2.data).sum() > 0: - equal = False - return equal - - for model_class in self.all_model_classes: - config.torchscript = True - model_not_tied = model_class(config) - if model_not_tied.get_output_embeddings() is None: - continue - - config_tied = copy.deepcopy(config) - config_tied.torchscript = False - model_tied = model_class(config_tied) - params_tied = list(model_tied.parameters()) - # Check that the embedding layer and decoding layer are the same in size and in value - # self.assertTrue(check_same_values(embeddings, decoding)) - - # Check that after resize they remain tied. - model_tied.resize_token_embeddings(config.text_config.vocab_size + 10) - params_tied_2 = list(model_tied.parameters()) - self.assertEqual(len(params_tied_2), len(params_tied)) - @require_torch class VipLlavaForConditionalGenerationIntegrationTest(unittest.TestCase): diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 061c0000ce..be68ec4217 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -1762,14 +1762,19 @@ class ModelTesterMixin: if self.model_tester.is_training is False: model.eval() - model_vocab_size = config.vocab_size + model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size # Retrieve the embeddings and clone theme model_embed = model.resize_token_embeddings(model_vocab_size) cloned_embeddings = model_embed.weight.clone() # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size model_embed = model.resize_token_embeddings(model_vocab_size + 10) - self.assertEqual(model.config.vocab_size, model_vocab_size + 10) + new_model_vocab_size = ( + model.config.text_config.vocab_size + if hasattr(model.config, "text_config") + else model.config.vocab_size + ) + self.assertEqual(new_model_vocab_size, model_vocab_size + 10) # Check that it actually resizes the embeddings matrix self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10) # Check that the model can still do a forward pass successfully (every parameter should be resized) @@ -1777,7 +1782,12 @@ class ModelTesterMixin: # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size model_embed = model.resize_token_embeddings(model_vocab_size - 15) - self.assertEqual(model.config.vocab_size, model_vocab_size - 15) + new_model_vocab_size = ( + model.config.text_config.vocab_size + if hasattr(model.config, "text_config") + else model.config.vocab_size + ) + self.assertEqual(new_model_vocab_size, model_vocab_size - 15) # Check that it actually resizes the embeddings matrix self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15) @@ -1802,15 +1812,25 @@ class ModelTesterMixin: model = model_class(config) model.to(torch_device) - model_vocab_size = config.vocab_size + model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1) - self.assertTrue(model.config.vocab_size + 10, model_vocab_size) + new_model_vocab_size = ( + model.config.text_config.vocab_size + if hasattr(model.config, "text_config") + else model.config.vocab_size + ) + self.assertTrue(new_model_vocab_size + 10, model_vocab_size) model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64) + new_model_vocab_size = ( + model.config.text_config.vocab_size + if hasattr(model.config, "text_config") + else model.config.vocab_size + ) self.assertTrue(model_embed.weight.shape[0] // 64, 0) - self.assertTrue(model_embed.weight.shape[0], model.config.vocab_size) - self.assertTrue(model.config.vocab_size, model.vocab_size) + self.assertTrue(model_embed.weight.shape[0], new_model_vocab_size) + self.assertTrue(new_model_vocab_size, model.vocab_size) model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64) self.assertTrue(model_embed.weight.shape[0] // 64, 0) @@ -1849,9 +1869,14 @@ class ModelTesterMixin: continue # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size - model_vocab_size = config.vocab_size + model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size model.resize_token_embeddings(model_vocab_size + 10) - self.assertEqual(model.config.vocab_size, model_vocab_size + 10) + new_model_vocab_size = ( + model.config.text_config.vocab_size + if hasattr(model.config, "text_config") + else model.config.vocab_size + ) + self.assertEqual(new_model_vocab_size, model_vocab_size + 10) output_embeds = model.get_output_embeddings() self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10) # Check bias if present @@ -1862,7 +1887,12 @@ class ModelTesterMixin: # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size model.resize_token_embeddings(model_vocab_size - 15) - self.assertEqual(model.config.vocab_size, model_vocab_size - 15) + new_model_vocab_size = ( + model.config.text_config.vocab_size + if hasattr(model.config, "text_config") + else model.config.vocab_size + ) + self.assertEqual(new_model_vocab_size, model_vocab_size - 15) # Check that it actually resizes the embeddings matrix output_embeds = model.get_output_embeddings() self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15) @@ -1949,7 +1979,8 @@ class ModelTesterMixin: # self.assertTrue(check_same_values(embeddings, decoding)) # Check that after resize they remain tied. - model_tied.resize_token_embeddings(config.vocab_size + 10) + vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size + model_tied.resize_token_embeddings(vocab_size + 10) params_tied_2 = list(model_tied.parameters()) self.assertEqual(len(params_tied_2), len(params_tied))