Tokenizer kwargs in textgeneration pipe (#28362)

* added args to the pipeline * added test * more sensical tests * fixup * docs * typo ; * docs * made changes to support named args * fixed test * docs update * styles * docs * docs
2024-01-15 07:52:18 -08:00
parent a573ac74fd
commit 366c03271e
3 changed files with 49 additions and 3 deletions
--- a/tests/pipelines/test_pipelines_text_generation.py
+++ b/tests/pipelines/test_pipelines_text_generation.py
@@ -90,6 +90,22 @@ class TextGenerationPipelineTests(unittest.TestCase):
                {"generated_token_ids": ANY(list)},
            ],
        )
+
+        ## -- test tokenizer_kwargs
+        test_str = "testing tokenizer kwargs. using truncation must result in a different generation."
+        output_str, output_str_with_truncation = (
+            text_generator(test_str, do_sample=False, return_full_text=False)[0]["generated_text"],
+            text_generator(
+                test_str,
+                do_sample=False,
+                return_full_text=False,
+                truncation=True,
+                max_length=3,
+            )[0]["generated_text"],
+        )
+        assert output_str != output_str_with_truncation  # results must be different because one hd truncation
+
+        # -- what is the point of this test? padding is hardcoded False in the pipeline anyway
        text_generator.tokenizer.pad_token_id = text_generator.model.config.eos_token_id
        text_generator.tokenizer.pad_token = "<pad>"
        outputs = text_generator(