Fixing Dataset for TQA + token-classification. (#14658)

* Fixing Dataset for TQA + token-classification.

* Fixing the tests.

* Making sure `offset_mappings` is a valid argument.
This commit is contained in:
Nicolas Patry
2021-12-08 09:54:24 +01:00
committed by GitHub
parent fae0b9faef
commit 2e12d90b9e
5 changed files with 58 additions and 20 deletions

View File

@@ -183,9 +183,12 @@ class PipelineTestCaseMeta(type):
# 10 examples with batch size 4 means there needs to be a unfinished batch
# which is important for the unbatcher
dataset = [copy.deepcopy(random.choice(examples)) for i in range(10)]
def data(n):
for _ in range(n):
# Need to copy because Conversation object is mutated
yield copy.deepcopy(random.choice(examples))
for item in pipeline(dataset, batch_size=4):
for item in pipeline(data(10), batch_size=4):
pass
run_batch_test(pipeline, examples)

View File

@@ -35,17 +35,16 @@ from transformers.testing_utils import (
from .test_pipelines_common import PipelineTestCaseMeta
@require_tensorflow_probability
@require_torch_scatter
@require_torch
@require_pandas
@is_pipeline_test
class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
# Putting it there for consistency, but TQA do not have fast tokenizer
# which are needed to generate automatic tests
model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
@require_tensorflow_probability
@require_pandas
@require_tf
@require_torch
def test_small_model_tf(self):
model_id = "lysandre/tiny-tapas-random-wtq"
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True)
@@ -147,6 +146,7 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
)
@require_torch
@require_torch_scatter
def test_small_model_pt(self):
model_id = "lysandre/tiny-tapas-random-wtq"
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
@@ -248,6 +248,7 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
)
@require_torch
@require_torch_scatter
def test_slow_tokenizer_sqa_pt(self):
model_id = "lysandre/tiny-tapas-random-sqa"
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
@@ -366,6 +367,9 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
)
@require_tf
@require_tensorflow_probability
@require_pandas
@require_torch
def test_slow_tokenizer_sqa_tf(self):
model_id = "lysandre/tiny-tapas-random-sqa"
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True)
@@ -484,6 +488,7 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
)
@slow
@require_torch_scatter
def test_integration_wtq_pt(self):
table_querier = pipeline("table-question-answering")
@@ -528,6 +533,8 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
self.assertListEqual(results, expected_results)
@slow
@require_tensorflow_probability
@require_pandas
def test_integration_wtq_tf(self):
model_id = "google/tapas-base-finetuned-wtq"
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
@@ -575,6 +582,7 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
self.assertListEqual(results, expected_results)
@slow
@require_torch_scatter
def test_integration_sqa_pt(self):
table_querier = pipeline(
"table-question-answering",
@@ -598,6 +606,8 @@ class TQAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
self.assertListEqual(results, expected_results)
@slow
@require_tensorflow_probability
@require_pandas
def test_integration_sqa_tf(self):
model_id = "google/tapas-base-finetuned-sqa"
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)

View File

@@ -636,6 +636,19 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
[],
)
token_classifier = pipeline(task="token-classification", model=model_name, framework="pt")
# Overload offset_mapping
outputs = token_classifier(
"This is a test !", offset_mapping=[(0, 0), (0, 1), (0, 2), (0, 0), (0, 0), (0, 0), (0, 0)]
)
self.assertEqual(
nested_simplify(outputs),
[
{"entity": "I-MISC", "score": 0.115, "index": 1, "word": "this", "start": 0, "end": 1},
{"entity": "I-MISC", "score": 0.115, "index": 2, "word": "is", "start": 0, "end": 2},
],
)
@require_torch
def test_pt_ignore_subwords_slow_tokenizer_raises(self):
model_name = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"