Tokenizer kwargs in textgeneration pipe (#28362)

* added args to the pipeline

* added test

* more sensical tests

* fixup

* docs

* typo
;

* docs

* made changes to support named args

* fixed test

* docs update

* styles

* docs

* docs
This commit is contained in:
thedamnedrhino
2024-01-15 07:52:18 -08:00
committed by GitHub
parent a573ac74fd
commit 366c03271e
3 changed files with 49 additions and 3 deletions

View File

@@ -90,6 +90,22 @@ class TextGenerationPipelineTests(unittest.TestCase):
{"generated_token_ids": ANY(list)},
],
)
## -- test tokenizer_kwargs
test_str = "testing tokenizer kwargs. using truncation must result in a different generation."
output_str, output_str_with_truncation = (
text_generator(test_str, do_sample=False, return_full_text=False)[0]["generated_text"],
text_generator(
test_str,
do_sample=False,
return_full_text=False,
truncation=True,
max_length=3,
)[0]["generated_text"],
)
assert output_str != output_str_with_truncation # results must be different because one hd truncation
# -- what is the point of this test? padding is hardcoded False in the pipeline anyway
text_generator.tokenizer.pad_token_id = text_generator.model.config.eos_token_id
text_generator.tokenizer.pad_token = "<pad>"
outputs = text_generator(