Add tokenizer kwargs to fill mask pipeline. (#26234)

* add tokenizer kwarg inputs

* Adding tokenizer_kwargs to _sanitize_parameters

* Add truncation=True example to tests

* Update test_pipelines_fill_mask.py

* Update test_pipelines_fill_mask.py

* make fix-copies and make style

* Update fill_mask.py

Replace single tick with double

* make fix-copies

* Style

---------

Co-authored-by: Lysandre <lysandre@huggingface.co>
This commit is contained in:
Nathan Cahill
2023-10-03 01:25:10 -07:00
committed by GitHub
parent df6a855e7b
commit b5ca8fcd20
4 changed files with 50 additions and 7 deletions

View File

@@ -211,6 +211,18 @@ class FillMaskPipelineTests(unittest.TestCase):
],
)
outputs = unmasker(
"My name is <mask>" + "Lorem ipsum dolor sit amet, consectetur adipiscing elit," * 100,
tokenizer_kwargs={"truncation": True},
)
self.assertEqual(
nested_simplify(outputs, decimals=6),
[
{"sequence": "My name is grouped", "score": 2.2e-05, "token": 38015, "token_str": " grouped"},
{"sequence": "My name is accuser", "score": 2.1e-05, "token": 25506, "token_str": " accuser"},
],
)
@require_torch
def test_model_no_pad_pt(self):
unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="pt")