🚨🚨 TextGenerationPipeline: rely on the tokenizer default kwargs (#31747)
* rely on the tokenizer default kwargs * fix a few tests
This commit is contained in:
@@ -2087,6 +2087,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
[1, 18],
|
||||
)
|
||||
|
||||
# TODO (joao): replace `stop_sequence` in the pipeline by the more recent `generate` functionality
|
||||
def test_stop_sequence_stopping_criteria(self):
|
||||
# PT-only test: TF doesn't have StoppingCriteria
|
||||
prompt = """Hello I believe in"""
|
||||
@@ -2094,17 +2095,11 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
||||
output = generator(prompt)
|
||||
self.assertEqual(
|
||||
output,
|
||||
[
|
||||
{
|
||||
"generated_text": (
|
||||
"Hello I believe in in in number number number number number number number number number"
|
||||
)
|
||||
}
|
||||
],
|
||||
[{"generated_text": ("Hello I believe in we we we we we we we we we")}],
|
||||
)
|
||||
|
||||
output = generator(prompt, stop_sequence=" number")
|
||||
self.assertEqual(output, [{"generated_text": "Hello I believe in in in number"}])
|
||||
output = generator(prompt, stop_sequence=" we")
|
||||
self.assertEqual(output, [{"generated_text": "Hello I believe in we"}])
|
||||
|
||||
def test_generate_non_nlp_input_ids_as_kwarg(self):
|
||||
# PT-only test: AFAIK there's no non-NLP model architecture in TF that supports `input_ids` as its only input
|
||||
|
||||
@@ -398,7 +398,7 @@ class TextGenerationPipelineTests(unittest.TestCase):
|
||||
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
|
||||
else:
|
||||
with self.assertRaises((ValueError, AssertionError)):
|
||||
outputs = text_generator("")
|
||||
outputs = text_generator("", add_special_tokens=False)
|
||||
|
||||
if text_generator.framework == "tf":
|
||||
# TF generation does not support max_new_tokens, and it's impossible
|
||||
|
||||
Reference in New Issue
Block a user