🚨🚨 TextGenerationPipeline: rely on the tokenizer default kwargs (#31747)

* rely on the tokenizer default kwargs

* fix a few tests
This commit is contained in:
Joao Gante
2024-07-02 15:17:42 +01:00
committed by GitHub
parent a9701953ff
commit 82486e5995
3 changed files with 20 additions and 23 deletions

View File

@@ -2087,6 +2087,7 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
[1, 18],
)
# TODO (joao): replace `stop_sequence` in the pipeline by the more recent `generate` functionality
def test_stop_sequence_stopping_criteria(self):
# PT-only test: TF doesn't have StoppingCriteria
prompt = """Hello I believe in"""
@@ -2094,17 +2095,11 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
output = generator(prompt)
self.assertEqual(
output,
[
{
"generated_text": (
"Hello I believe in in in number number number number number number number number number"
)
}
],
[{"generated_text": ("Hello I believe in we we we we we we we we we")}],
)
output = generator(prompt, stop_sequence=" number")
self.assertEqual(output, [{"generated_text": "Hello I believe in in in number"}])
output = generator(prompt, stop_sequence=" we")
self.assertEqual(output, [{"generated_text": "Hello I believe in we"}])
def test_generate_non_nlp_input_ids_as_kwarg(self):
# PT-only test: AFAIK there's no non-NLP model architecture in TF that supports `input_ids` as its only input