Moving question_answering tests to the new testing scheme. Had to tweak a little some ModelTesterConfig for pipelines. (#13277)
* Moving question_answering tests to the new testing scheme. Had to tweak a little some ModelTesterConfig for pipelines. * Removing commented code.
This commit is contained in:
@@ -202,7 +202,7 @@ class QuestionAnsweringPipeline(Pipeline):
|
|||||||
- **answer** (:obj:`str`) -- The answer to the question.
|
- **answer** (:obj:`str`) -- The answer to the question.
|
||||||
"""
|
"""
|
||||||
# Set defaults values
|
# Set defaults values
|
||||||
kwargs.setdefault("padding", "longest")
|
kwargs.setdefault("padding", "longest" if getattr(self.tokenizer, "pad_token", None) is not None else False)
|
||||||
kwargs.setdefault("topk", 1)
|
kwargs.setdefault("topk", 1)
|
||||||
kwargs.setdefault("doc_stride", 128)
|
kwargs.setdefault("doc_stride", 128)
|
||||||
kwargs.setdefault("max_answer_len", 15)
|
kwargs.setdefault("max_answer_len", 15)
|
||||||
@@ -353,17 +353,17 @@ class QuestionAnsweringPipeline(Pipeline):
|
|||||||
# Start: Index of the first character of the answer in the context string
|
# Start: Index of the first character of the answer in the context string
|
||||||
# End: Index of the character following the last character of the answer in the context string
|
# End: Index of the character following the last character of the answer in the context string
|
||||||
# Answer: Plain text of the answer
|
# Answer: Plain text of the answer
|
||||||
answers += [
|
for s, e, score in zip(starts, ends, scores):
|
||||||
{
|
answers.append(
|
||||||
"score": score.item(),
|
{
|
||||||
"start": np.where(char_to_word == feature.token_to_orig_map[s])[0][0].item(),
|
"score": score.item(),
|
||||||
"end": np.where(char_to_word == feature.token_to_orig_map[e])[0][-1].item(),
|
"start": np.where(char_to_word == feature.token_to_orig_map[s])[0][0].item(),
|
||||||
"answer": " ".join(
|
"end": np.where(char_to_word == feature.token_to_orig_map[e])[0][-1].item(),
|
||||||
example.doc_tokens[feature.token_to_orig_map[s] : feature.token_to_orig_map[e] + 1]
|
"answer": " ".join(
|
||||||
),
|
example.doc_tokens[feature.token_to_orig_map[s] : feature.token_to_orig_map[e] + 1]
|
||||||
}
|
),
|
||||||
for s, e, score in zip(starts, ends, scores)
|
}
|
||||||
]
|
)
|
||||||
else:
|
else:
|
||||||
# Convert the answer (tokens) back to the original text
|
# Convert the answer (tokens) back to the original text
|
||||||
# Score: score from the model
|
# Score: score from the model
|
||||||
@@ -376,25 +376,26 @@ class QuestionAnsweringPipeline(Pipeline):
|
|||||||
# Sometimes the max probability token is in the middle of a word so:
|
# Sometimes the max probability token is in the middle of a word so:
|
||||||
# - we start by finding the right word containing the token with `token_to_word`
|
# - we start by finding the right word containing the token with `token_to_word`
|
||||||
# - then we convert this word in a character span with `word_to_chars`
|
# - then we convert this word in a character span with `word_to_chars`
|
||||||
answers += [
|
sequence_index = 1 if question_first else 0
|
||||||
{
|
for s, e, score in zip(starts, ends, scores):
|
||||||
"score": score.item(),
|
try:
|
||||||
"start": enc.word_to_chars(
|
start_word = enc.token_to_word(s)
|
||||||
enc.token_to_word(s), sequence_index=1 if question_first else 0
|
end_word = enc.token_to_word(e)
|
||||||
)[0],
|
start_index = enc.word_to_chars(start_word, sequence_index=sequence_index)[0]
|
||||||
"end": enc.word_to_chars(enc.token_to_word(e), sequence_index=1 if question_first else 0)[
|
end_index = enc.word_to_chars(end_word, sequence_index=sequence_index)[1]
|
||||||
1
|
except Exception:
|
||||||
],
|
# Some tokenizers don't really handle words. Keep to offsets then.
|
||||||
"answer": example.context_text[
|
start_index = enc.offsets[s][0]
|
||||||
enc.word_to_chars(enc.token_to_word(s), sequence_index=1 if question_first else 0)[
|
end_index = enc.offsets[e][1]
|
||||||
0
|
|
||||||
] : enc.word_to_chars(enc.token_to_word(e), sequence_index=1 if question_first else 0)[
|
answers.append(
|
||||||
1
|
{
|
||||||
]
|
"score": score.item(),
|
||||||
],
|
"start": start_index,
|
||||||
}
|
"end": end_index,
|
||||||
for s, e, score in zip(starts, ends, scores)
|
"answer": example.context_text[start_index:end_index],
|
||||||
]
|
}
|
||||||
|
)
|
||||||
|
|
||||||
if kwargs["handle_impossible_answer"]:
|
if kwargs["handle_impossible_answer"]:
|
||||||
answers.append({"score": min_null_score, "start": 0, "end": 0, "answer": ""})
|
answers.append({"score": min_null_score, "start": 0, "end": 0, "answer": ""})
|
||||||
|
|||||||
@@ -147,6 +147,11 @@ class BartModelTester:
|
|||||||
pad_token_id=self.pad_token_id,
|
pad_token_id=self.pad_token_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_pipeline_config(self):
|
||||||
|
config = self.get_config()
|
||||||
|
config.max_position_embeddings = 100
|
||||||
|
return config
|
||||||
|
|
||||||
def prepare_config_and_inputs_for_common(self):
|
def prepare_config_and_inputs_for_common(self):
|
||||||
config, inputs_dict = self.prepare_config_and_inputs()
|
config, inputs_dict = self.prepare_config_and_inputs()
|
||||||
return config, inputs_dict
|
return config, inputs_dict
|
||||||
|
|||||||
@@ -189,6 +189,7 @@ class ReformerModelTester:
|
|||||||
def get_pipeline_config(self):
|
def get_pipeline_config(self):
|
||||||
config = self.get_config()
|
config = self.get_config()
|
||||||
config.vocab_size = 100
|
config.vocab_size = 100
|
||||||
|
config.axial_pos_shape = (4, 25)
|
||||||
config.is_decoder = False
|
config.is_decoder = False
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|||||||
@@ -14,107 +14,126 @@
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import is_tf_available, is_torch_available
|
from transformers import (
|
||||||
|
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||||
|
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
|
||||||
|
LxmertConfig,
|
||||||
|
QuestionAnsweringPipeline,
|
||||||
|
)
|
||||||
from transformers.data.processors.squad import SquadExample
|
from transformers.data.processors.squad import SquadExample
|
||||||
from transformers.pipelines import Pipeline, QuestionAnsweringArgumentHandler, pipeline
|
from transformers.pipelines import QuestionAnsweringArgumentHandler, pipeline
|
||||||
from transformers.testing_utils import slow
|
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch, slow
|
||||||
|
|
||||||
from .test_pipelines_common import CustomInputPipelineCommonMixin
|
from .test_pipelines_common import ANY, PipelineTestCaseMeta
|
||||||
|
|
||||||
|
|
||||||
class QAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
|
@is_pipeline_test
|
||||||
pipeline_task = "question-answering"
|
class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||||
pipeline_running_kwargs = {
|
model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
|
||||||
"padding": "max_length",
|
tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
|
||||||
"max_seq_len": 25,
|
|
||||||
"doc_stride": 5,
|
def run_pipeline_test(self, model, tokenizer, feature_extractor):
|
||||||
} # Default is 'longest' but we use 'max_length' to test equivalence between slow/fast tokenizers
|
if isinstance(model.config, LxmertConfig):
|
||||||
small_models = [
|
# This is an bimodal model, we need to find a more consistent way
|
||||||
"sshleifer/tiny-distilbert-base-cased-distilled-squad"
|
# to switch on those models.
|
||||||
] # Models tested without the @slow decorator
|
return
|
||||||
large_models = [] # Models tested with the @slow decorator
|
question_answerer = QuestionAnsweringPipeline(model, tokenizer)
|
||||||
valid_inputs = [
|
|
||||||
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
|
outputs = question_answerer(
|
||||||
{
|
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
|
||||||
"question": "In what field is HuggingFace working ?",
|
)
|
||||||
"context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
|
self.assertEqual(outputs, {"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)})
|
||||||
},
|
|
||||||
{
|
outputs = question_answerer(
|
||||||
"question": ["In what field is HuggingFace working ?", "In what field is HuggingFace working ?"],
|
question=["In what field is HuggingFace working ?", "In what field is HuggingFace working ?"],
|
||||||
"context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
|
context="HuggingFace was founded in Paris.",
|
||||||
},
|
)
|
||||||
{
|
self.assertEqual(
|
||||||
"question": ["In what field is HuggingFace working ?", "In what field is HuggingFace working ?"],
|
outputs,
|
||||||
"context": [
|
[
|
||||||
"HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
|
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
|
||||||
"HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
|
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
|
||||||
],
|
],
|
||||||
},
|
)
|
||||||
]
|
|
||||||
|
|
||||||
def get_pipelines(self):
|
outputs = question_answerer(
|
||||||
question_answering_pipelines = [
|
question=["What field is HuggingFace working ?", "In what field is HuggingFace ?"],
|
||||||
pipeline(
|
context=[
|
||||||
task=self.pipeline_task,
|
"HuggingFace is a startup based in New-York",
|
||||||
model=model,
|
"HuggingFace is a startup founded in Paris",
|
||||||
tokenizer=model,
|
],
|
||||||
framework="pt" if is_torch_available() else "tf",
|
)
|
||||||
**self.pipeline_loading_kwargs,
|
self.assertEqual(
|
||||||
)
|
outputs,
|
||||||
for model in self.small_models
|
[
|
||||||
]
|
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
|
||||||
return question_answering_pipelines
|
{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
question_answerer(question="", context="HuggingFace was founded in Paris.")
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
question_answerer(question=None, context="HuggingFace was founded in Paris.")
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
question_answerer(question="In what field is HuggingFace working ?", context="")
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
question_answerer(question="In what field is HuggingFace working ?", context=None)
|
||||||
|
|
||||||
|
outputs = question_answerer(
|
||||||
|
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris.", topk=20
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
outputs, [{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)} for i in range(20)]
|
||||||
|
)
|
||||||
|
|
||||||
|
@require_torch
|
||||||
|
def test_small_model_pt(self):
|
||||||
|
question_answerer = pipeline(
|
||||||
|
"question-answering", model="sshleifer/tiny-distilbert-base-cased-distilled-squad"
|
||||||
|
)
|
||||||
|
outputs = question_answerer(
|
||||||
|
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(nested_simplify(outputs), {"score": 0.01, "start": 0, "end": 11, "answer": "HuggingFace"})
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
def test_small_model_tf(self):
|
||||||
|
question_answerer = pipeline(
|
||||||
|
"question-answering", model="sshleifer/tiny-distilbert-base-cased-distilled-squad", framework="tf"
|
||||||
|
)
|
||||||
|
outputs = question_answerer(
|
||||||
|
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(nested_simplify(outputs), {"score": 0.011, "start": 0, "end": 11, "answer": "HuggingFace"})
|
||||||
|
|
||||||
@slow
|
@slow
|
||||||
@unittest.skipIf(not is_torch_available() and not is_tf_available(), "Either torch or TF must be installed.")
|
@require_torch
|
||||||
def test_high_topk_small_context(self):
|
def test_large_model_pt(self):
|
||||||
self.pipeline_running_kwargs.update({"topk": 20})
|
question_answerer = pipeline(
|
||||||
valid_inputs = [
|
"question-answering",
|
||||||
{"question": "Where was HuggingFace founded ?", "context": "Paris"},
|
)
|
||||||
]
|
outputs = question_answerer(
|
||||||
question_answering_pipelines = self.get_pipelines()
|
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
|
||||||
output_keys = {"score", "answer", "start", "end"}
|
)
|
||||||
for question_answering_pipeline in question_answering_pipelines:
|
|
||||||
result = question_answering_pipeline(valid_inputs, **self.pipeline_running_kwargs)
|
|
||||||
self.assertIsInstance(result, dict)
|
|
||||||
|
|
||||||
for key in output_keys:
|
self.assertEqual(nested_simplify(outputs), {"score": 0.979, "start": 27, "end": 32, "answer": "Paris"})
|
||||||
self.assertIn(key, result)
|
|
||||||
|
|
||||||
def _test_pipeline(self, question_answering_pipeline: Pipeline):
|
@slow
|
||||||
output_keys = {"score", "answer", "start", "end"}
|
@require_tf
|
||||||
valid_inputs = [
|
def test_large_model_tf(self):
|
||||||
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
|
question_answerer = pipeline("question-answering", framework="tf")
|
||||||
{
|
outputs = question_answerer(
|
||||||
"question": "In what field is HuggingFace working ?",
|
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
|
||||||
"context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
|
)
|
||||||
},
|
|
||||||
]
|
|
||||||
invalid_inputs = [
|
|
||||||
{"question": "", "context": "This is a test to try empty question edge case"},
|
|
||||||
{"question": None, "context": "This is a test to try empty question edge case"},
|
|
||||||
{"question": "What is does with empty context ?", "context": ""},
|
|
||||||
{"question": "What is does with empty context ?", "context": None},
|
|
||||||
]
|
|
||||||
self.assertIsNotNone(question_answering_pipeline)
|
|
||||||
|
|
||||||
mono_result = question_answering_pipeline(valid_inputs[0])
|
self.assertEqual(nested_simplify(outputs), {"score": 0.979, "start": 27, "end": 32, "answer": "Paris"})
|
||||||
self.assertIsInstance(mono_result, dict)
|
|
||||||
|
|
||||||
for key in output_keys:
|
|
||||||
self.assertIn(key, mono_result)
|
|
||||||
|
|
||||||
multi_result = question_answering_pipeline(valid_inputs)
|
|
||||||
self.assertIsInstance(multi_result, list)
|
|
||||||
self.assertIsInstance(multi_result[0], dict)
|
|
||||||
|
|
||||||
for result in multi_result:
|
|
||||||
for key in output_keys:
|
|
||||||
self.assertIn(key, result)
|
|
||||||
for bad_input in invalid_inputs:
|
|
||||||
self.assertRaises(ValueError, question_answering_pipeline, bad_input)
|
|
||||||
self.assertRaises(ValueError, question_answering_pipeline, invalid_inputs)
|
|
||||||
|
|
||||||
|
@is_pipeline_test
|
||||||
|
class QuestionAnsweringArgumentHandlerTests(unittest.TestCase):
|
||||||
def test_argument_handler(self):
|
def test_argument_handler(self):
|
||||||
qa = QuestionAnsweringArgumentHandler()
|
qa = QuestionAnsweringArgumentHandler()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user