XGLM: Fix left-padding (PT and TF) (#22828)
This commit is contained in:
@@ -175,44 +175,6 @@ class TFXGLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
||||
name = model.get_bias()
|
||||
assert name is None
|
||||
|
||||
@slow
|
||||
def test_batch_generation(self):
|
||||
model = TFXGLMForCausalLM.from_pretrained("facebook/xglm-564M")
|
||||
tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
|
||||
|
||||
tokenizer.padding_side = "left"
|
||||
|
||||
# use different length sentences to test batching
|
||||
sentences = [
|
||||
"Hello, my dog is a little",
|
||||
"Today, I",
|
||||
]
|
||||
|
||||
inputs = tokenizer(sentences, return_tensors="tf", padding=True)
|
||||
|
||||
outputs = model.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
|
||||
|
||||
inputs_non_padded = tokenizer(sentences[0], return_tensors="tf").input_ids
|
||||
output_non_padded = model.generate(input_ids=inputs_non_padded)
|
||||
|
||||
num_paddings = (
|
||||
inputs_non_padded.shape[-1]
|
||||
- tf.math.reduce_sum(tf.cast(inputs["attention_mask"][-1], dtype=tf.int64)).numpy()
|
||||
)
|
||||
inputs_padded = tokenizer(sentences[1], return_tensors="tf").input_ids
|
||||
output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings)
|
||||
|
||||
batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
||||
non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True)
|
||||
padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True)
|
||||
|
||||
expected_output_sentence = [
|
||||
"Hello, my dog is a little bit of a shy one, but he is very friendly",
|
||||
"Today, I am going to share with you a few of my favorite things",
|
||||
]
|
||||
self.assertListEqual(expected_output_sentence, batch_out_sentence)
|
||||
self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in TF_XGLM_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
@@ -246,7 +208,9 @@ class TFXGLMModelLanguageGenerationTest(unittest.TestCase):
|
||||
tf.random.set_seed(0)
|
||||
tokenized = tokenizer("Today is a nice day and", return_tensors="tf")
|
||||
input_ids = tokenized.input_ids
|
||||
output_ids = model.generate(input_ids, do_sample=True, seed=[7, 0])
|
||||
# forces the generation to happen on CPU, to avoid GPU-related quirks (and assure same output regardless of the available devices)
|
||||
with tf.device(":/CPU:0"):
|
||||
output_ids = model.generate(input_ids, do_sample=True, seed=[7, 0])
|
||||
output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
||||
|
||||
EXPECTED_OUTPUT_STR = (
|
||||
@@ -255,33 +219,41 @@ class TFXGLMModelLanguageGenerationTest(unittest.TestCase):
|
||||
self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
|
||||
|
||||
@slow
|
||||
def test_lm_generate_xglm_left_padding(self):
|
||||
"""Tests that the generated text is the same, regarless of left padding"""
|
||||
tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
|
||||
def test_batch_generation(self):
|
||||
model = TFXGLMForCausalLM.from_pretrained("facebook/xglm-564M")
|
||||
tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
|
||||
|
||||
tokenizer.padding_side = "left"
|
||||
|
||||
generation_kwargs = {
|
||||
"bad_words_ids": [tokenizer("is").input_ids, tokenizer("angry about").input_ids],
|
||||
"no_repeat_ngram_size": 2,
|
||||
"do_sample": False,
|
||||
"repetition_penalty": 1.3,
|
||||
}
|
||||
expected_output_string = (
|
||||
"Today is a beautiful day and I am so glad that we have the opportunity to spend time with"
|
||||
)
|
||||
# use different length sentences to test batching
|
||||
sentences = [
|
||||
"This is an extremelly long sentence that only exists to test the ability of the model to cope with "
|
||||
"left-padding, such as in batched generation. The output for the sequence below should be the same "
|
||||
"regardless of whether left padding is applied or not. When",
|
||||
"Hello, my dog is a little",
|
||||
]
|
||||
|
||||
sentences = ["Today is a beautiful day and"]
|
||||
input_ids = tokenizer(sentences, return_tensors="tf", padding=True)
|
||||
# using default length
|
||||
output_ids = model.generate(**input_ids, **generation_kwargs)
|
||||
output_strings = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
||||
self.assertEqual(output_strings[0], expected_output_string)
|
||||
inputs = tokenizer(sentences, return_tensors="tf", padding=True)
|
||||
input_ids = inputs["input_ids"]
|
||||
|
||||
sentences = ["Today is a beautiful day and", "This is a very long input that we absolutely don't care about"]
|
||||
input_ids = tokenizer(sentences, return_tensors="tf", padding=True)
|
||||
# longer max length to capture the full length (remember: it is left padded)
|
||||
output_ids = model.generate(**input_ids, **generation_kwargs, max_length=28)
|
||||
output_strings = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
||||
self.assertEqual(output_strings[0], expected_output_string)
|
||||
outputs = model.generate(input_ids=input_ids, attention_mask=inputs["attention_mask"], max_new_tokens=12)
|
||||
|
||||
inputs_non_padded = tokenizer(sentences[0], return_tensors="tf").input_ids
|
||||
output_non_padded = model.generate(input_ids=inputs_non_padded, max_new_tokens=12)
|
||||
|
||||
inputs_padded = tokenizer(sentences[1], return_tensors="tf").input_ids
|
||||
output_padded = model.generate(input_ids=inputs_padded, max_new_tokens=12)
|
||||
|
||||
batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
||||
non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True)
|
||||
padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True)
|
||||
|
||||
expected_output_sentence = [
|
||||
"This is an extremelly long sentence that only exists to test the ability of the model to cope with "
|
||||
"left-padding, such as in batched generation. The output for the sequence below should be the same "
|
||||
"regardless of whether left padding is applied or not. When left padding is applied, the sequence will be "
|
||||
"a single",
|
||||
"Hello, my dog is a little bit of a shy one, but he is very friendly",
|
||||
]
|
||||
self.assertListEqual(expected_output_sentence, batch_out_sentence)
|
||||
self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
|
||||
|
||||
@@ -340,46 +340,6 @@ class XGLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_xglm_weight_initialization(*config_and_inputs)
|
||||
|
||||
@slow
|
||||
def test_batch_generation(self):
|
||||
model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M")
|
||||
model.to(torch_device)
|
||||
tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
|
||||
|
||||
tokenizer.padding_side = "left"
|
||||
|
||||
# use different length sentences to test batching
|
||||
sentences = [
|
||||
"Hello, my dog is a little",
|
||||
"Today, I",
|
||||
]
|
||||
|
||||
inputs = tokenizer(sentences, return_tensors="pt", padding=True)
|
||||
input_ids = inputs["input_ids"].to(torch_device)
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids=input_ids,
|
||||
attention_mask=inputs["attention_mask"].to(torch_device),
|
||||
)
|
||||
|
||||
inputs_non_padded = tokenizer(sentences[0], return_tensors="pt").input_ids.to(torch_device)
|
||||
output_non_padded = model.generate(input_ids=inputs_non_padded)
|
||||
|
||||
num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][-1].long().sum().cpu().item()
|
||||
inputs_padded = tokenizer(sentences[1], return_tensors="pt").input_ids.to(torch_device)
|
||||
output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings)
|
||||
|
||||
batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
||||
non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True)
|
||||
padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True)
|
||||
|
||||
expected_output_sentence = [
|
||||
"Hello, my dog is a little bit of a shy one, but he is very friendly",
|
||||
"Today, I am going to share with you a few of my favorite things",
|
||||
]
|
||||
self.assertListEqual(expected_output_sentence, batch_out_sentence)
|
||||
self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in XGLM_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
@@ -409,6 +369,49 @@ class XGLMModelLanguageGenerationTest(unittest.TestCase):
|
||||
if verify_outputs:
|
||||
self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
|
||||
|
||||
@slow
|
||||
def test_batch_generation(self):
|
||||
model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M")
|
||||
model.to(torch_device)
|
||||
tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
|
||||
|
||||
tokenizer.padding_side = "left"
|
||||
|
||||
# use different length sentences to test batching
|
||||
sentences = [
|
||||
"This is an extremelly long sentence that only exists to test the ability of the model to cope with "
|
||||
"left-padding, such as in batched generation. The output for the sequence below should be the same "
|
||||
"regardless of whether left padding is applied or not. When",
|
||||
"Hello, my dog is a little",
|
||||
]
|
||||
|
||||
inputs = tokenizer(sentences, return_tensors="pt", padding=True)
|
||||
input_ids = inputs["input_ids"].to(torch_device)
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids=input_ids, attention_mask=inputs["attention_mask"].to(torch_device), max_new_tokens=12
|
||||
)
|
||||
|
||||
inputs_non_padded = tokenizer(sentences[0], return_tensors="pt").input_ids.to(torch_device)
|
||||
output_non_padded = model.generate(input_ids=inputs_non_padded, max_new_tokens=12)
|
||||
|
||||
inputs_padded = tokenizer(sentences[1], return_tensors="pt").input_ids.to(torch_device)
|
||||
output_padded = model.generate(input_ids=inputs_padded, max_new_tokens=12)
|
||||
|
||||
batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
||||
non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True)
|
||||
padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True)
|
||||
|
||||
expected_output_sentence = [
|
||||
"This is an extremelly long sentence that only exists to test the ability of the model to cope with "
|
||||
"left-padding, such as in batched generation. The output for the sequence below should be the same "
|
||||
"regardless of whether left padding is applied or not. When left padding is applied, the sequence will be "
|
||||
"a single",
|
||||
"Hello, my dog is a little bit of a shy one, but he is very friendly",
|
||||
]
|
||||
self.assertListEqual(expected_output_sentence, batch_out_sentence)
|
||||
self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
|
||||
|
||||
@slow
|
||||
def test_lm_generate_xglm(self):
|
||||
self._test_lm_generate_xglm_helper()
|
||||
|
||||
Reference in New Issue
Block a user