Tokenizer kwargs in textgeneration pipe (#28362)
* added args to the pipeline * added test * more sensical tests * fixup * docs * typo ; * docs * made changes to support named args * fixed test * docs update * styles * docs * docs
This commit is contained in:
@@ -90,6 +90,22 @@ class TextGenerationPipelineTests(unittest.TestCase):
|
||||
{"generated_token_ids": ANY(list)},
|
||||
],
|
||||
)
|
||||
|
||||
## -- test tokenizer_kwargs
|
||||
test_str = "testing tokenizer kwargs. using truncation must result in a different generation."
|
||||
output_str, output_str_with_truncation = (
|
||||
text_generator(test_str, do_sample=False, return_full_text=False)[0]["generated_text"],
|
||||
text_generator(
|
||||
test_str,
|
||||
do_sample=False,
|
||||
return_full_text=False,
|
||||
truncation=True,
|
||||
max_length=3,
|
||||
)[0]["generated_text"],
|
||||
)
|
||||
assert output_str != output_str_with_truncation # results must be different because one hd truncation
|
||||
|
||||
# -- what is the point of this test? padding is hardcoded False in the pipeline anyway
|
||||
text_generator.tokenizer.pad_token_id = text_generator.model.config.eos_token_id
|
||||
text_generator.tokenizer.pad_token = "<pad>"
|
||||
outputs = text_generator(
|
||||
|
||||
Reference in New Issue
Block a user