Don't use LayoutLMv2 and LayoutLMv3 in some pipeline tests (#22774)

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2023-04-17 17:45:20 +02:00
committed by GitHub
parent ea7b0a539a
commit 5269718cb7
6 changed files with 39 additions and 26 deletions

View File

@@ -273,33 +273,11 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
{ {
"document-question-answering": LayoutLMv2ForQuestionAnswering, "document-question-answering": LayoutLMv2ForQuestionAnswering,
"feature-extraction": LayoutLMv2Model, "feature-extraction": LayoutLMv2Model,
"question-answering": LayoutLMv2ForQuestionAnswering,
"text-classification": LayoutLMv2ForSequenceClassification,
"token-classification": LayoutLMv2ForTokenClassification,
"zero-shot": LayoutLMv2ForSequenceClassification,
} }
if is_torch_available() if is_torch_available()
else {} else {}
) )
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name in [
"QAPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
"ZeroShotClassificationPipelineTests",
]:
# `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
# config. With new tiny model creation, it is available, but we need to fix the failed tests.
return True
return super().is_pipeline_test_to_skip(
pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
)
def setUp(self): def setUp(self):
self.model_tester = LayoutLMv2ModelTester(self) self.model_tester = LayoutLMv2ModelTester(self)
self.config_tester = ConfigTester(self, config_class=LayoutLMv2Config, hidden_size=37) self.config_tester = ConfigTester(self, config_class=LayoutLMv2Config, hidden_size=37)

View File

@@ -289,10 +289,6 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
{ {
"document-question-answering": LayoutLMv3ForQuestionAnswering, "document-question-answering": LayoutLMv3ForQuestionAnswering,
"feature-extraction": LayoutLMv3Model, "feature-extraction": LayoutLMv3Model,
"question-answering": LayoutLMv3ForQuestionAnswering,
"text-classification": LayoutLMv3ForSequenceClassification,
"token-classification": LayoutLMv3ForTokenClassification,
"zero-shot": LayoutLMv3ForSequenceClassification,
} }
if is_torch_available() if is_torch_available()
else {} else {}
@@ -302,6 +298,10 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
def is_pipeline_test_to_skip( def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
): ):
# `DocumentQuestionAnsweringPipeline` is expected to work with this model, but it combines the text and visual
# embedding along the sequence dimension (dim 1), which causes an error during post-processing as `p_mask` has
# the sequence dimension of the text embedding only.
# (see the line `embedding_output = torch.cat([embedding_output, visual_embeddings], dim=1)`)
return True return True
def setUp(self): def setUp(self):

View File

@@ -34,11 +34,20 @@ from transformers.testing_utils import (
from .test_pipelines_common import ANY from .test_pipelines_common import ANY
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test @is_pipeline_test
class QAPipelineTests(unittest.TestCase): class QAPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
if model_mapping is not None:
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
if tf_model_mapping is not None:
tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
def get_test_pipeline(self, model, tokenizer, processor): def get_test_pipeline(self, model, tokenizer, processor):
if isinstance(model.config, LxmertConfig): if isinstance(model.config, LxmertConfig):
# This is an bimodal model, we need to find a more consistent way # This is an bimodal model, we need to find a more consistent way

View File

@@ -25,11 +25,20 @@ from transformers.testing_utils import is_pipeline_test, nested_simplify, requir
from .test_pipelines_common import ANY from .test_pipelines_common import ANY
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test @is_pipeline_test
class TextClassificationPipelineTests(unittest.TestCase): class TextClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
if model_mapping is not None:
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
if tf_model_mapping is not None:
tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
@require_torch @require_torch
def test_small_model_pt(self): def test_small_model_pt(self):
text_classifier = pipeline( text_classifier = pipeline(

View File

@@ -39,12 +39,20 @@ from .test_pipelines_common import ANY
VALID_INPUTS = ["A simple string", ["list of strings", "A simple string that is quite a bit longer"]] VALID_INPUTS = ["A simple string", ["list of strings", "A simple string that is quite a bit longer"]]
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test @is_pipeline_test
class TokenClassificationPipelineTests(unittest.TestCase): class TokenClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
if model_mapping is not None:
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
if tf_model_mapping is not None:
tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
def get_test_pipeline(self, model, tokenizer, processor): def get_test_pipeline(self, model, tokenizer, processor):
token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer) token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
return token_classifier, ["A simple string", "A simple string that is quite a bit longer"] return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]

View File

@@ -26,11 +26,20 @@ from transformers.testing_utils import is_pipeline_test, nested_simplify, requir
from .test_pipelines_common import ANY from .test_pipelines_common import ANY
# These 2 model types require different inputs than those of the usual text models.
_TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test @is_pipeline_test
class ZeroShotClassificationPipelineTests(unittest.TestCase): class ZeroShotClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
if model_mapping is not None:
model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
if tf_model_mapping is not None:
tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
def get_test_pipeline(self, model, tokenizer, processor): def get_test_pipeline(self, model, tokenizer, processor):
classifier = ZeroShotClassificationPipeline( classifier = ZeroShotClassificationPipeline(
model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"] model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]