Adding a new align_to_words param to qa pipeline. (#18010)

* Adding a new `align_to_words` param to qa pipeline.

* Update src/transformers/pipelines/question_answering.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Import protection.

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Nicolas Patry
2022-08-09 18:50:02 +02:00
committed by GitHub
parent ab2006e3d6
commit 9f5fe63548
2 changed files with 61 additions and 10 deletions

View File

@@ -171,6 +171,29 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
self.assertEqual(nested_simplify(outputs), {"score": 0.028, "start": 0, "end": 11, "answer": "HuggingFace"})
@slow
@require_torch
def test_small_model_japanese(self):
question_answerer = pipeline(
"question-answering",
model="KoichiYasuoka/deberta-base-japanese-aozora-ud-head",
)
output = question_answerer(question="国語", context="全学年にわたって小学校の国語の教科書に挿し絵が用いられている")
# Wrong answer, the whole text is identified as one "word" since the tokenizer does not include
# a pretokenizer
self.assertEqual(
nested_simplify(output),
{"score": 1.0, "start": 0, "end": 30, "answer": "全学年にわたって小学校の国語の教科書に挿し絵が用いられている"},
)
# Disable word alignment
output = question_answerer(question="国語", context="全学年にわたって小学校の国語の教科書に挿し絵が用いられている", align_to_words=False)
self.assertEqual(
nested_simplify(output),
{"score": 1.0, "start": 15, "end": 18, "answer": "教科書"},
)
@slow
@require_torch
def test_small_model_long_context_cls_slow(self):