RFC: Replace custom TF embeddings by Keras embeddings (#18939)

This commit is contained in:
Joao Gante
2022-09-10 11:34:49 +01:00
committed by GitHub
parent 855dcae8bb
commit 00cbadb870
5 changed files with 141 additions and 142 deletions

View File

@@ -230,69 +230,6 @@ class TFBartModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestC
name = model.get_bias()
assert name is None
def test_resize_token_embeddings(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
# Here we build the word embeddings weights if not exists.
# And then we retry to get the attribute once built.
model(model.dummy_inputs)
if hasattr(embedding_layer, "weight"):
return embedding_layer.weight
else:
return None
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
# build the embeddings
model = model_class(config=config)
old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
old_final_logits_bias = model.get_bias()
# reshape the embeddings
model.resize_token_embeddings(size)
new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())
new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())
new_final_logits_bias = model.get_bias()
# check that the resized embeddings size matches the desired size.
assert_size = size if size is not None else config.vocab_size
self.assertEqual(new_input_embeddings.shape[0], assert_size)
# check that weights remain the same after resizing
models_equal = True
for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_output_embeddings is not None and new_output_embeddings is not None:
self.assertEqual(new_output_embeddings.shape[0], assert_size)
models_equal = True
for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
if old_final_logits_bias is not None and new_final_logits_bias is not None:
old_final_logits_bias = old_final_logits_bias["final_logits_bias"]
new_final_logits_bias = new_final_logits_bias["final_logits_bias"]
self.assertEqual(new_final_logits_bias.shape[0], 1)
self.assertEqual(new_final_logits_bias.shape[1], assert_size)
models_equal = True
for old, new in zip(old_final_logits_bias.value(), new_final_logits_bias.value()):
for p1, p2 in zip(old, new):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)
@tooslow
def test_saved_model_creation(self):
pass
@@ -635,7 +572,7 @@ class FasterTFBartModelIntegrationTests(unittest.TestCase):
def test_xsum_1_1_generation(self):
model = self.xsum_1_1_model
assert model.model.decoder.embed_tokens._layer == model.model.shared
assert model.model.decoder.embed_tokens == model.model.shared
ARTICLE = (
"The Palestinian Authority officially became the 123rd member of the International Criminal Court on"
" Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The"
@@ -685,7 +622,7 @@ class FasterTFBartModelIntegrationTests(unittest.TestCase):
def test_xsum_1_1_xla_generation(self):
# same test as above, but with `no_repeat_ngram_size=0` (not compatible with XLA) and XLA comparison enabled
model = self.xsum_1_1_model
assert model.model.decoder.embed_tokens._layer == model.model.shared
assert model.model.decoder.embed_tokens == model.model.shared
ARTICLE = (
"The Palestinian Authority officially became the 123rd member of the International Criminal Court on"
" Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The"

View File

@@ -1144,30 +1144,20 @@ class TFModelTesterMixin:
self.assert_outputs_same(output_for_dict_input, output_for_kw_input)
def test_resize_token_embeddings(self):
# TODO (joao): after the embeddings refactor is complete, rework this test so as to rely exclusively on
# tf.keras.layers.Embedding
if not self.test_resize_embeddings:
return
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _get_word_embedding_weight(model, embedding_layer):
embeds = getattr(embedding_layer, "weight", None)
if embeds is not None:
return embeds
embeds = getattr(embedding_layer, "decoder", None)
if embeds is not None:
return embeds
model(model.dummy_inputs)
embeds = getattr(embedding_layer, "weight", None)
if embeds is not None:
return embeds
embeds = getattr(embedding_layer, "decoder", None)
if embeds is not None:
return embeds
return None
if isinstance(embedding_layer, tf.keras.layers.Embedding):
# builds the embeddings layer
model(model.dummy_inputs)
return embedding_layer.embeddings
else:
return model._get_word_embedding_weight(embedding_layer)
for model_class in self.all_model_classes:
for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
@@ -1195,10 +1185,10 @@ class TFModelTesterMixin:
if old_bias is not None and new_bias is not None:
for old_weight, new_weight in zip(old_bias.values(), new_bias.values()):
self.assertEqual(new_weight.shape[0], assert_size)
self.assertEqual(new_weight.shape[-1], assert_size)
models_equal = True
for p1, p2 in zip(old_weight.value(), new_weight.value()):
for p1, p2 in zip(tf.squeeze(old_weight), tf.squeeze(new_weight)):
if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
models_equal = False
self.assertTrue(models_equal)