Fix XGLM loss computation (PyTorch and TensorFlow) (#35878)

* Fix XGLM loss computation (PyTorch and TensorFlow)

* Update expected output string in XGLM sample test

This updates the expected output string of test_xglm_sample for torch
2.0 to the correct one and removes the one for torch 1.13.1 + cu116
(transformers moved to torch 2.0 with PR #35358).

* Update expected output IDs in XGLM generation test
This commit is contained in:
Damiano Amatruda
2025-02-18 15:37:48 +01:00
committed by GitHub
parent c3ba53303b
commit 4d2de5f63c
4 changed files with 48 additions and 39 deletions

View File

@@ -238,3 +238,22 @@ class TFXGLMModelLanguageGenerationTest(unittest.TestCase):
]
self.assertListEqual(expected_output_sentence, batch_out_sentence)
self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
def test_loss_with_padding(self):
tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
model = TFXGLMForCausalLM.from_pretrained("facebook/xglm-564M")
tokenizer.padding_side = "right"
sequence = "Sequence"
tokenized_non_padded = tokenizer(sequence, return_tensors="tf")
labels_non_padded = tokenized_non_padded.input_ids
loss_non_padded = model(tokenized_non_padded, labels=labels_non_padded).loss
tokenized_padded = tokenizer(sequence, padding="max_length", max_length=16, return_tensors="tf")
labels_padded = tokenized_padded.input_ids
labels_padded = tf.where(labels_padded == tokenizer.pad_token_id, -100, labels_padded)
loss_padded = model(tokenized_padded, labels=labels_padded).loss
tf.debugging.assert_near(loss_non_padded, loss_padded, atol=1e-3)

View File

@@ -356,7 +356,7 @@ class XGLMModelLanguageGenerationTest(unittest.TestCase):
model.to(torch_device)
input_ids = torch.tensor([[2, 268, 9865]], dtype=torch.long, device=torch_device) # The dog
# </s> The dog is a very friendly dog. He is very affectionate and loves to play with other
expected_output_ids = [2, 268, 9865, 67, 11, 1988, 57252, 9865, 5, 984, 67, 1988, 213838, 1658, 53, 70446, 33, 6657, 278, 1581] # fmt: skip
expected_output_ids = [2, 268, 9865, 67, 11, 1988, 57252, 9865, 5, 984, 67, 1988, 213838, 1658, 53, 70446, 33, 6657, 278, 1581, 72616, 5, 984] # fmt: skip
output_ids = model.generate(input_ids, do_sample=False, num_beams=1)
if verify_outputs:
self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
@@ -423,14 +423,11 @@ class XGLMModelLanguageGenerationTest(unittest.TestCase):
output_ids = model.generate(input_ids, do_sample=True, num_beams=1)
output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True)
EXPECTED_OUTPUT_STRS = [
# TODO: remove this once we move to torch 2.0
# torch 1.13.1 + cu116
"Today is a nice day and the sun is shining. A nice day with warm rainy",
# torch 2.0 + cu117
"Today is a nice day and the water is still cold. We just stopped off for some fresh",
]
self.assertIn(output_str, EXPECTED_OUTPUT_STRS)
EXPECTED_OUTPUT_STR = (
"Today is a nice day and the water is still cold. We just stopped off for some fresh coffee. This place"
" looks like a"
)
self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
@require_torch_accelerator
@require_torch_fp16
@@ -451,3 +448,25 @@ class XGLMModelLanguageGenerationTest(unittest.TestCase):
self.assertFalse(
torch.isnan(outputs.logits[0]).any().item()
) # the first logits could contain NaNs if it fails
def test_loss_with_padding(self):
tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M")
model.to(torch_device)
tokenizer.padding_side = "right"
sequence = "Sequence"
tokenized_non_padded = tokenizer(sequence, return_tensors="pt")
tokenized_non_padded.to(torch_device)
labels_non_padded = tokenized_non_padded.input_ids.clone()
loss_non_padded = model(**tokenized_non_padded, labels=labels_non_padded).loss
tokenized_padded = tokenizer(sequence, padding="max_length", max_length=16, return_tensors="pt")
tokenized_padded.to(torch_device)
labels_padded = tokenized_padded.input_ids.clone()
labels_padded[labels_padded == tokenizer.pad_token_id] = -100
loss_padded = model(**tokenized_padded, labels=labels_padded).loss
torch.testing.assert_close(loss_non_padded, loss_padded, rtol=1e-3, atol=1e-3)