Use text config's vocab size in testing models (#30568)
use text config's vocab size
This commit is contained in:
committed by
GitHub
parent
78fdd64dcf
commit
9d31b32e9d
@@ -14,7 +14,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Testing suite for the PyTorch Llava model."""
|
"""Testing suite for the PyTorch Llava model."""
|
||||||
|
|
||||||
import copy
|
|
||||||
import gc
|
import gc
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
@@ -192,171 +191,6 @@ class LlavaForConditionalGenerationModelTest(ModelTesterMixin, unittest.TestCase
|
|||||||
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_resize_tokens_embeddings(self):
|
|
||||||
(
|
|
||||||
original_config,
|
|
||||||
inputs_dict,
|
|
||||||
) = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
if not self.test_resize_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
|
|
||||||
if self.model_tester.is_training is False:
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
# Retrieve the embeddings and clone theme
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size)
|
|
||||||
cloned_embeddings = model_embed.weight.clone()
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size + 10)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size - 15)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
|
|
||||||
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
# Input ids should be clamped to the maximum size of the vocabulary
|
|
||||||
inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
|
|
||||||
# make sure that decoder_input_ids are resized as well
|
|
||||||
if "decoder_input_ids" in inputs_dict:
|
|
||||||
inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
|
|
||||||
models_equal = True
|
|
||||||
for p1, p2 in zip(cloned_embeddings, model_embed.weight):
|
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
|
||||||
models_equal = False
|
|
||||||
|
|
||||||
self.assertTrue(models_equal)
|
|
||||||
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
|
|
||||||
self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size)
|
|
||||||
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
|
||||||
|
|
||||||
self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size)
|
|
||||||
self.assertTrue(model.config.text_config.vocab_size, model.vocab_size)
|
|
||||||
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
|
||||||
|
|
||||||
# Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size
|
|
||||||
target_dimension = 128
|
|
||||||
model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0], target_dimension)
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
|
||||||
ValueError,
|
|
||||||
"Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer",
|
|
||||||
):
|
|
||||||
model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3)
|
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_resize_embeddings_untied(self):
|
|
||||||
(
|
|
||||||
original_config,
|
|
||||||
inputs_dict,
|
|
||||||
) = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
if not self.test_resize_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
original_config.tie_word_embeddings = False
|
|
||||||
|
|
||||||
# if model cannot untied embeddings -> leave test
|
|
||||||
if original_config.tie_word_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config).to(torch_device)
|
|
||||||
|
|
||||||
# if no output embeddings -> leave test
|
|
||||||
if model.get_output_embeddings() is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
model.resize_token_embeddings(model_vocab_size + 10)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
|
|
||||||
output_embeds = model.get_output_embeddings()
|
|
||||||
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
|
|
||||||
# Check bias if present
|
|
||||||
if output_embeds.bias is not None:
|
|
||||||
self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
|
||||||
model.resize_token_embeddings(model_vocab_size - 15)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
output_embeds = model.get_output_embeddings()
|
|
||||||
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
|
|
||||||
# Check bias if present
|
|
||||||
if output_embeds.bias is not None:
|
|
||||||
self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
# Input ids should be clamped to the maximum size of the vocabulary
|
|
||||||
inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
if "decoder_input_ids" in inputs_dict:
|
|
||||||
inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_tie_model_weights(self):
|
|
||||||
if not self.test_torchscript:
|
|
||||||
return
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
def check_same_values(layer_1, layer_2):
|
|
||||||
equal = True
|
|
||||||
for p1, p2 in zip(layer_1.weight, layer_2.weight):
|
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
|
||||||
equal = False
|
|
||||||
return equal
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config.torchscript = True
|
|
||||||
model_not_tied = model_class(config)
|
|
||||||
if model_not_tied.get_output_embeddings() is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
config_tied = copy.deepcopy(config)
|
|
||||||
config_tied.torchscript = False
|
|
||||||
model_tied = model_class(config_tied)
|
|
||||||
params_tied = list(model_tied.parameters())
|
|
||||||
# Check that the embedding layer and decoding layer are the same in size and in value
|
|
||||||
# self.assertTrue(check_same_values(embeddings, decoding))
|
|
||||||
|
|
||||||
# Check that after resize they remain tied.
|
|
||||||
model_tied.resize_token_embeddings(config.text_config.vocab_size + 10)
|
|
||||||
params_tied_2 = list(model_tied.parameters())
|
|
||||||
self.assertEqual(len(params_tied_2), len(params_tied))
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
class LlavaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
class LlavaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
||||||
|
|||||||
@@ -14,7 +14,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
""" Testing suite for the PyTorch Llava-NeXT model. """
|
""" Testing suite for the PyTorch Llava-NeXT model. """
|
||||||
|
|
||||||
import copy
|
|
||||||
import gc
|
import gc
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
@@ -215,171 +214,6 @@ class LlavaNextForConditionalGenerationModelTest(ModelTesterMixin, GenerationTes
|
|||||||
def test_cpu_offload(self):
|
def test_cpu_offload(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_resize_tokens_embeddings(self):
|
|
||||||
(
|
|
||||||
original_config,
|
|
||||||
inputs_dict,
|
|
||||||
) = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
if not self.test_resize_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
|
|
||||||
if self.model_tester.is_training is False:
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
# Retrieve the embeddings and clone theme
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size)
|
|
||||||
cloned_embeddings = model_embed.weight.clone()
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size + 10)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size - 15)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
|
|
||||||
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
# Input ids should be clamped to the maximum size of the vocabulary
|
|
||||||
inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
|
|
||||||
# make sure that decoder_input_ids are resized as well
|
|
||||||
if "decoder_input_ids" in inputs_dict:
|
|
||||||
inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
|
|
||||||
models_equal = True
|
|
||||||
for p1, p2 in zip(cloned_embeddings, model_embed.weight):
|
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
|
||||||
models_equal = False
|
|
||||||
|
|
||||||
self.assertTrue(models_equal)
|
|
||||||
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
|
|
||||||
self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size)
|
|
||||||
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
|
||||||
|
|
||||||
self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size)
|
|
||||||
self.assertTrue(model.config.text_config.vocab_size, model.vocab_size)
|
|
||||||
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
|
||||||
|
|
||||||
# Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size
|
|
||||||
target_dimension = 128
|
|
||||||
model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0], target_dimension)
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
|
||||||
ValueError,
|
|
||||||
"Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer",
|
|
||||||
):
|
|
||||||
model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3)
|
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_resize_embeddings_untied(self):
|
|
||||||
(
|
|
||||||
original_config,
|
|
||||||
inputs_dict,
|
|
||||||
) = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
if not self.test_resize_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
original_config.tie_word_embeddings = False
|
|
||||||
|
|
||||||
# if model cannot untied embeddings -> leave test
|
|
||||||
if original_config.tie_word_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config).to(torch_device)
|
|
||||||
|
|
||||||
# if no output embeddings -> leave test
|
|
||||||
if model.get_output_embeddings() is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
model.resize_token_embeddings(model_vocab_size + 10)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
|
|
||||||
output_embeds = model.get_output_embeddings()
|
|
||||||
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
|
|
||||||
# Check bias if present
|
|
||||||
if output_embeds.bias is not None:
|
|
||||||
self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
|
||||||
model.resize_token_embeddings(model_vocab_size - 15)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
output_embeds = model.get_output_embeddings()
|
|
||||||
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
|
|
||||||
# Check bias if present
|
|
||||||
if output_embeds.bias is not None:
|
|
||||||
self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
# Input ids should be clamped to the maximum size of the vocabulary
|
|
||||||
inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
if "decoder_input_ids" in inputs_dict:
|
|
||||||
inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_tie_model_weights(self):
|
|
||||||
if not self.test_torchscript:
|
|
||||||
return
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
def check_same_values(layer_1, layer_2):
|
|
||||||
equal = True
|
|
||||||
for p1, p2 in zip(layer_1.weight, layer_2.weight):
|
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
|
||||||
equal = False
|
|
||||||
return equal
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config.torchscript = True
|
|
||||||
model_not_tied = model_class(config)
|
|
||||||
if model_not_tied.get_output_embeddings() is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
config_tied = copy.deepcopy(config)
|
|
||||||
config_tied.torchscript = False
|
|
||||||
model_tied = model_class(config_tied)
|
|
||||||
params_tied = list(model_tied.parameters())
|
|
||||||
# Check that the embedding layer and decoding layer are the same in size and in value
|
|
||||||
# self.assertTrue(check_same_values(embeddings, decoding))
|
|
||||||
|
|
||||||
# Check that after resize they remain tied.
|
|
||||||
model_tied.resize_token_embeddings(config.text_config.vocab_size + 10)
|
|
||||||
params_tied_2 = list(model_tied.parameters())
|
|
||||||
self.assertEqual(len(params_tied_2), len(params_tied))
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase):
|
class LlavaNextForConditionalGenerationIntegrationTest(unittest.TestCase):
|
||||||
|
|||||||
@@ -14,7 +14,6 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
""" Testing suite for the PyTorch VipLlava model. """
|
""" Testing suite for the PyTorch VipLlava model. """
|
||||||
|
|
||||||
import copy
|
|
||||||
import gc
|
import gc
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
@@ -186,171 +185,6 @@ class VipLlavaForConditionalGenerationModelTest(ModelTesterMixin, unittest.TestC
|
|||||||
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_tokens_embeddings with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_resize_tokens_embeddings(self):
|
|
||||||
(
|
|
||||||
original_config,
|
|
||||||
inputs_dict,
|
|
||||||
) = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
if not self.test_resize_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
|
|
||||||
if self.model_tester.is_training is False:
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
# Retrieve the embeddings and clone theme
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size)
|
|
||||||
cloned_embeddings = model_embed.weight.clone()
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size + 10)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size - 15)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
|
|
||||||
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
# Input ids should be clamped to the maximum size of the vocabulary
|
|
||||||
inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
|
|
||||||
# make sure that decoder_input_ids are resized as well
|
|
||||||
if "decoder_input_ids" in inputs_dict:
|
|
||||||
inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
|
|
||||||
models_equal = True
|
|
||||||
for p1, p2 in zip(cloned_embeddings, model_embed.weight):
|
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
|
||||||
models_equal = False
|
|
||||||
|
|
||||||
self.assertTrue(models_equal)
|
|
||||||
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config)
|
|
||||||
model.to(torch_device)
|
|
||||||
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
|
|
||||||
self.assertTrue(model.config.text_config.vocab_size + 10, model_vocab_size)
|
|
||||||
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
|
||||||
|
|
||||||
self.assertTrue(model_embed.weight.shape[0], model.config.text_config.vocab_size)
|
|
||||||
self.assertTrue(model.config.text_config.vocab_size, model.vocab_size)
|
|
||||||
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
|
||||||
|
|
||||||
# Check that resizing a model to a multiple of pad_to_multiple leads to a model of exactly that size
|
|
||||||
target_dimension = 128
|
|
||||||
model_embed = model.resize_token_embeddings(target_dimension, pad_to_multiple_of=64)
|
|
||||||
self.assertTrue(model_embed.weight.shape[0], target_dimension)
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
|
||||||
ValueError,
|
|
||||||
"Asking to pad the embedding matrix to a multiple of `1.3`, which is not and integer. Please make sure to pass an integer",
|
|
||||||
):
|
|
||||||
model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=1.3)
|
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_resize_embeddings_untied with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_resize_embeddings_untied(self):
|
|
||||||
(
|
|
||||||
original_config,
|
|
||||||
inputs_dict,
|
|
||||||
) = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
if not self.test_resize_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
original_config.tie_word_embeddings = False
|
|
||||||
|
|
||||||
# if model cannot untied embeddings -> leave test
|
|
||||||
if original_config.tie_word_embeddings:
|
|
||||||
return
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config = copy.deepcopy(original_config)
|
|
||||||
model = model_class(config).to(torch_device)
|
|
||||||
|
|
||||||
# if no output embeddings -> leave test
|
|
||||||
if model.get_output_embeddings() is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
|
||||||
model_vocab_size = config.text_config.vocab_size
|
|
||||||
model.resize_token_embeddings(model_vocab_size + 10)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size + 10)
|
|
||||||
output_embeds = model.get_output_embeddings()
|
|
||||||
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
|
|
||||||
# Check bias if present
|
|
||||||
if output_embeds.bias is not None:
|
|
||||||
self.assertEqual(output_embeds.bias.shape[0], model_vocab_size + 10)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
|
||||||
model.resize_token_embeddings(model_vocab_size - 15)
|
|
||||||
self.assertEqual(model.config.text_config.vocab_size, model_vocab_size - 15)
|
|
||||||
# Check that it actually resizes the embeddings matrix
|
|
||||||
output_embeds = model.get_output_embeddings()
|
|
||||||
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
|
|
||||||
# Check bias if present
|
|
||||||
if output_embeds.bias is not None:
|
|
||||||
self.assertEqual(output_embeds.bias.shape[0], model_vocab_size - 15)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
# Input ids should be clamped to the maximum size of the vocabulary
|
|
||||||
inputs_dict["input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
if "decoder_input_ids" in inputs_dict:
|
|
||||||
inputs_dict["decoder_input_ids"].clamp_(max=model_vocab_size - 15 - 1)
|
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
|
||||||
model(**self._prepare_for_class(inputs_dict, model_class))
|
|
||||||
|
|
||||||
# Copied from tests.test_modeling_common.ModelTesterMixin.test_tie_model_weights with config.vocab_size->config.text_config.vocab_size
|
|
||||||
def test_tie_model_weights(self):
|
|
||||||
if not self.test_torchscript:
|
|
||||||
return
|
|
||||||
|
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
|
||||||
|
|
||||||
def check_same_values(layer_1, layer_2):
|
|
||||||
equal = True
|
|
||||||
for p1, p2 in zip(layer_1.weight, layer_2.weight):
|
|
||||||
if p1.data.ne(p2.data).sum() > 0:
|
|
||||||
equal = False
|
|
||||||
return equal
|
|
||||||
|
|
||||||
for model_class in self.all_model_classes:
|
|
||||||
config.torchscript = True
|
|
||||||
model_not_tied = model_class(config)
|
|
||||||
if model_not_tied.get_output_embeddings() is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
config_tied = copy.deepcopy(config)
|
|
||||||
config_tied.torchscript = False
|
|
||||||
model_tied = model_class(config_tied)
|
|
||||||
params_tied = list(model_tied.parameters())
|
|
||||||
# Check that the embedding layer and decoding layer are the same in size and in value
|
|
||||||
# self.assertTrue(check_same_values(embeddings, decoding))
|
|
||||||
|
|
||||||
# Check that after resize they remain tied.
|
|
||||||
model_tied.resize_token_embeddings(config.text_config.vocab_size + 10)
|
|
||||||
params_tied_2 = list(model_tied.parameters())
|
|
||||||
self.assertEqual(len(params_tied_2), len(params_tied))
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
class VipLlavaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
class VipLlavaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
||||||
|
|||||||
@@ -1762,14 +1762,19 @@ class ModelTesterMixin:
|
|||||||
if self.model_tester.is_training is False:
|
if self.model_tester.is_training is False:
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
model_vocab_size = config.vocab_size
|
model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size
|
||||||
# Retrieve the embeddings and clone theme
|
# Retrieve the embeddings and clone theme
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size)
|
model_embed = model.resize_token_embeddings(model_vocab_size)
|
||||||
cloned_embeddings = model_embed.weight.clone()
|
cloned_embeddings = model_embed.weight.clone()
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size + 10)
|
model_embed = model.resize_token_embeddings(model_vocab_size + 10)
|
||||||
self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
|
new_model_vocab_size = (
|
||||||
|
model.config.text_config.vocab_size
|
||||||
|
if hasattr(model.config, "text_config")
|
||||||
|
else model.config.vocab_size
|
||||||
|
)
|
||||||
|
self.assertEqual(new_model_vocab_size, model_vocab_size + 10)
|
||||||
# Check that it actually resizes the embeddings matrix
|
# Check that it actually resizes the embeddings matrix
|
||||||
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
|
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
|
||||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
||||||
@@ -1777,7 +1782,12 @@ class ModelTesterMixin:
|
|||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size - 15)
|
model_embed = model.resize_token_embeddings(model_vocab_size - 15)
|
||||||
self.assertEqual(model.config.vocab_size, model_vocab_size - 15)
|
new_model_vocab_size = (
|
||||||
|
model.config.text_config.vocab_size
|
||||||
|
if hasattr(model.config, "text_config")
|
||||||
|
else model.config.vocab_size
|
||||||
|
)
|
||||||
|
self.assertEqual(new_model_vocab_size, model_vocab_size - 15)
|
||||||
# Check that it actually resizes the embeddings matrix
|
# Check that it actually resizes the embeddings matrix
|
||||||
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
|
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
|
||||||
|
|
||||||
@@ -1802,15 +1812,25 @@ class ModelTesterMixin:
|
|||||||
model = model_class(config)
|
model = model_class(config)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
|
|
||||||
model_vocab_size = config.vocab_size
|
model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size
|
||||||
model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
|
model.resize_token_embeddings(model_vocab_size + 10, pad_to_multiple_of=1)
|
||||||
self.assertTrue(model.config.vocab_size + 10, model_vocab_size)
|
new_model_vocab_size = (
|
||||||
|
model.config.text_config.vocab_size
|
||||||
|
if hasattr(model.config, "text_config")
|
||||||
|
else model.config.vocab_size
|
||||||
|
)
|
||||||
|
self.assertTrue(new_model_vocab_size + 10, model_vocab_size)
|
||||||
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
|
model_embed = model.resize_token_embeddings(model_vocab_size, pad_to_multiple_of=64)
|
||||||
|
new_model_vocab_size = (
|
||||||
|
model.config.text_config.vocab_size
|
||||||
|
if hasattr(model.config, "text_config")
|
||||||
|
else model.config.vocab_size
|
||||||
|
)
|
||||||
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
||||||
|
|
||||||
self.assertTrue(model_embed.weight.shape[0], model.config.vocab_size)
|
self.assertTrue(model_embed.weight.shape[0], new_model_vocab_size)
|
||||||
self.assertTrue(model.config.vocab_size, model.vocab_size)
|
self.assertTrue(new_model_vocab_size, model.vocab_size)
|
||||||
|
|
||||||
model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
|
model_embed = model.resize_token_embeddings(model_vocab_size + 13, pad_to_multiple_of=64)
|
||||||
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
self.assertTrue(model_embed.weight.shape[0] // 64, 0)
|
||||||
@@ -1849,9 +1869,14 @@ class ModelTesterMixin:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
|
||||||
model_vocab_size = config.vocab_size
|
model_vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size
|
||||||
model.resize_token_embeddings(model_vocab_size + 10)
|
model.resize_token_embeddings(model_vocab_size + 10)
|
||||||
self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
|
new_model_vocab_size = (
|
||||||
|
model.config.text_config.vocab_size
|
||||||
|
if hasattr(model.config, "text_config")
|
||||||
|
else model.config.vocab_size
|
||||||
|
)
|
||||||
|
self.assertEqual(new_model_vocab_size, model_vocab_size + 10)
|
||||||
output_embeds = model.get_output_embeddings()
|
output_embeds = model.get_output_embeddings()
|
||||||
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
|
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size + 10)
|
||||||
# Check bias if present
|
# Check bias if present
|
||||||
@@ -1862,7 +1887,12 @@ class ModelTesterMixin:
|
|||||||
|
|
||||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
||||||
model.resize_token_embeddings(model_vocab_size - 15)
|
model.resize_token_embeddings(model_vocab_size - 15)
|
||||||
self.assertEqual(model.config.vocab_size, model_vocab_size - 15)
|
new_model_vocab_size = (
|
||||||
|
model.config.text_config.vocab_size
|
||||||
|
if hasattr(model.config, "text_config")
|
||||||
|
else model.config.vocab_size
|
||||||
|
)
|
||||||
|
self.assertEqual(new_model_vocab_size, model_vocab_size - 15)
|
||||||
# Check that it actually resizes the embeddings matrix
|
# Check that it actually resizes the embeddings matrix
|
||||||
output_embeds = model.get_output_embeddings()
|
output_embeds = model.get_output_embeddings()
|
||||||
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
|
self.assertEqual(output_embeds.weight.shape[0], model_vocab_size - 15)
|
||||||
@@ -1949,7 +1979,8 @@ class ModelTesterMixin:
|
|||||||
# self.assertTrue(check_same_values(embeddings, decoding))
|
# self.assertTrue(check_same_values(embeddings, decoding))
|
||||||
|
|
||||||
# Check that after resize they remain tied.
|
# Check that after resize they remain tied.
|
||||||
model_tied.resize_token_embeddings(config.vocab_size + 10)
|
vocab_size = config.text_config.vocab_size if hasattr(config, "text_config") else config.vocab_size
|
||||||
|
model_tied.resize_token_embeddings(vocab_size + 10)
|
||||||
params_tied_2 = list(model_tied.parameters())
|
params_tied_2 = list(model_tied.parameters())
|
||||||
self.assertEqual(len(params_tied_2), len(params_tied))
|
self.assertEqual(len(params_tied_2), len(params_tied))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user