[generate] beam search -- fix output cropping (#37080)
* handle jagged beams * better comment * bart -- beam search tests print special tokens * more bart test updates * more tests! * better comment
This commit is contained in:
@@ -415,16 +415,20 @@ class M2M100ModelIntegrationTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
expected_en = [
|
||||
"The NSA case highlights the total absence of intelligence debate",
|
||||
"I think there are two levels of response from the French government.",
|
||||
"</s> __en__ "
|
||||
"The NSA case highlights the total absence of intelligence debate"
|
||||
"</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>",
|
||||
"</s> __en__ "
|
||||
"I think there are two levels of response from the French government."
|
||||
"</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>",
|
||||
"</s> __en__ "
|
||||
"When François Hollande calls Barack Obama or when Foreign Minister Laurent Fabius calls the U.S."
|
||||
" Ambassador, they respond to a real discovery, which is that of the scale of U.S. surveillance on all"
|
||||
" communications in France.",
|
||||
" communications in France."
|
||||
"</s>",
|
||||
]
|
||||
|
||||
generated = tokenizer.batch_decode(
|
||||
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
|
||||
)
|
||||
generated = tokenizer.batch_decode(hypotheses_batch)
|
||||
assert generated == expected_en
|
||||
|
||||
@require_flash_attn
|
||||
|
||||
Reference in New Issue
Block a user