Don't use LayoutLMv2 and LayoutLMv3 in some pipeline tests (#22774)

* fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2023-04-17 17:45:20 +02:00
parent ea7b0a539a
commit 5269718cb7
6 changed files with 39 additions and 26 deletions
--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@@ -273,33 +273,11 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
        {
            "document-question-answering": LayoutLMv2ForQuestionAnswering,
            "feature-extraction": LayoutLMv2Model,
            "question-answering": LayoutLMv2ForQuestionAnswering,
            "text-classification": LayoutLMv2ForSequenceClassification,
            "token-classification": LayoutLMv2ForTokenClassification,
            "zero-shot": LayoutLMv2ForSequenceClassification,
        }
        if is_torch_available()
        else {}
    )
    # TODO: Fix the failed tests
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
    ):
        if pipeline_test_casse_name in [
            "QAPipelineTests",
            "TextClassificationPipelineTests",
            "TokenClassificationPipelineTests",
            "ZeroShotClassificationPipelineTests",
        ]:
            # `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
            # config. With new tiny model creation, it is available, but we need to fix the failed tests.
            return True
        return super().is_pipeline_test_to_skip(
            pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
        )
    def setUp(self):
        self.model_tester = LayoutLMv2ModelTester(self)
        self.config_tester = ConfigTester(self, config_class=LayoutLMv2Config, hidden_size=37)
--- a/tests/models/layoutlmv3/test_modeling_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_modeling_layoutlmv3.py
@@ -289,10 +289,6 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
        {
            "document-question-answering": LayoutLMv3ForQuestionAnswering,
            "feature-extraction": LayoutLMv3Model,
            "question-answering": LayoutLMv3ForQuestionAnswering,
            "text-classification": LayoutLMv3ForSequenceClassification,
            "token-classification": LayoutLMv3ForTokenClassification,
            "zero-shot": LayoutLMv3ForSequenceClassification,
        }
        if is_torch_available()
        else {}
@@ -302,6 +298,10 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
    def is_pipeline_test_to_skip(
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
    ):
        # `DocumentQuestionAnsweringPipeline` is expected to work with this model, but it combines the text and visual
        # embedding along the sequence dimension (dim 1), which causes an error during post-processing as `p_mask` has
        # the sequence dimension of the text embedding only.
        # (see the line `embedding_output = torch.cat([embedding_output, visual_embeddings], dim=1)`)
        return True
    def setUp(self):
--- a/tests/pipelines/test_pipelines_question_answering.py
+++ b/tests/pipelines/test_pipelines_question_answering.py
@@ -34,11 +34,20 @@ from transformers.testing_utils import (
 from .test_pipelines_common import ANY
 # These 2 model types require different inputs than those of the usual text models.
 _TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
 class QAPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
    tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
    if model_mapping is not None:
        model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
    if tf_model_mapping is not None:
        tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
    def get_test_pipeline(self, model, tokenizer, processor):
        if isinstance(model.config, LxmertConfig):
            # This is an bimodal model, we need to find a more consistent way
--- a/tests/pipelines/test_pipelines_text_classification.py
+++ b/tests/pipelines/test_pipelines_text_classification.py
@@ -25,11 +25,20 @@ from transformers.testing_utils import is_pipeline_test, nested_simplify, requir
 from .test_pipelines_common import ANY
 # These 2 model types require different inputs than those of the usual text models.
 _TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
 class TextClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
    if model_mapping is not None:
        model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
    if tf_model_mapping is not None:
        tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
    @require_torch
    def test_small_model_pt(self):
        text_classifier = pipeline(
--- a/tests/pipelines/test_pipelines_token_classification.py
+++ b/tests/pipelines/test_pipelines_token_classification.py
@@ -39,12 +39,20 @@ from .test_pipelines_common import ANY
 VALID_INPUTS = ["A simple string", ["list of strings", "A simple string that is quite a bit longer"]]
 # These 2 model types require different inputs than those of the usual text models.
 _TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
 class TokenClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    if model_mapping is not None:
        model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
    if tf_model_mapping is not None:
        tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
    def get_test_pipeline(self, model, tokenizer, processor):
        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
        return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]
--- a/tests/pipelines/test_pipelines_zero_shot.py
+++ b/tests/pipelines/test_pipelines_zero_shot.py
@@ -26,11 +26,20 @@ from transformers.testing_utils import is_pipeline_test, nested_simplify, requir
 from .test_pipelines_common import ANY
 # These 2 model types require different inputs than those of the usual text models.
 _TO_SKIP = {"LayoutLMv2Config", "LayoutLMv3Config"}
@is_pipeline_test
 class ZeroShotClassificationPipelineTests(unittest.TestCase):
    model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
    if model_mapping is not None:
        model_mapping = {config: model for config, model in model_mapping.items() if config.__name__ in _TO_SKIP}
    if tf_model_mapping is not None:
        tf_model_mapping = {config: model for config, model in tf_model_mapping.items() if config.__name__ in _TO_SKIP}
    def get_test_pipeline(self, model, tokenizer, processor):
        classifier = ZeroShotClassificationPipeline(
            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]