[BugFix] QA pipeline edge case: align_to_words=True in QuestionAnsweringPipeline can lead to duplicate answers (#38761)
* fixing the problem align_to_words=True leading to duplicate solutions * adding tests * some fixes * some fixes * changing the handle_duplicate_answers=False by default * some fixese * some fixes * make the duplicate handling the default behaviour and merge duplicates * make the duplicate handling the default behaviour
This commit is contained in:
@@ -138,7 +138,11 @@ class QAPipelineTests(unittest.TestCase):
|
||||
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris.", top_k=20
|
||||
)
|
||||
self.assertEqual(
|
||||
outputs, [{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)} for i in range(20)]
|
||||
outputs,
|
||||
[
|
||||
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)}
|
||||
for i in range(len(outputs))
|
||||
],
|
||||
)
|
||||
for single_output in outputs:
|
||||
compare_pipeline_output_to_hub_spec(single_output, QuestionAnsweringOutputElement)
|
||||
@@ -279,6 +283,19 @@ class QAPipelineTests(unittest.TestCase):
|
||||
)
|
||||
self.assertEqual(nested_simplify(outputs), {"score": 0.988, "start": 0, "end": 0, "answer": ""})
|
||||
|
||||
@require_torch
|
||||
def test_duplicate_handling(self):
|
||||
question_answerer = pipeline("question-answering", model="deepset/tinyroberta-squad2")
|
||||
|
||||
outputs = question_answerer(
|
||||
question="Who is the chancellor of Germany?",
|
||||
context="Angela Merkel was the chancellor of Germany.",
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
answers = [output["answer"] for output in outputs]
|
||||
self.assertEqual(len(answers), len(set(answers)), "There are duplicate answers in the outputs.")
|
||||
|
||||
@require_tf
|
||||
def test_small_model_tf(self):
|
||||
question_answerer = pipeline(
|
||||
|
||||
Reference in New Issue
Block a user