From 8b05ace0145315d00f1e4b8bc8329433df650839 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Wed, 22 Mar 2023 20:02:24 +0100
Subject: [PATCH] Fix PipelineTests skip conditions (#22320)

* check what tests fail

* Skip failing tests

* Skip failing tests

* Skip failing tests

* Skip failing tests

* clean up

* clean up

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 tests/models/cpm/test_tokenization_cpm.py     |  6 +++++
 tests/models/luke/test_modeling_luke.py       |  9 +++++++
 .../models/markuplm/test_modeling_markuplm.py |  8 ++++++
 tests/models/mbart/test_modeling_mbart.py     | 10 +++++++
 tests/models/mbart/test_modeling_tf_mbart.py  | 10 +++++++
 tests/models/whisper/test_modeling_whisper.py | 11 ++++++++
 tests/models/xlnet/test_modeling_tf_xlnet.py  |  7 +++++
 tests/models/xlnet/test_modeling_xlnet.py     |  7 +++++
 tests/test_pipeline_mixin.py                  | 26 +++++++++----------
 9 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/tests/models/cpm/test_tokenization_cpm.py b/tests/models/cpm/test_tokenization_cpm.py
index 1d66778b8c..fa69a6aaa7 100644
--- a/tests/models/cpm/test_tokenization_cpm.py
+++ b/tests/models/cpm/test_tokenization_cpm.py
@@ -21,6 +21,12 @@ from ..xlnet.test_modeling_xlnet import XLNetModelTest
 
 @custom_tokenizers
 class CpmTokenizationTest(XLNetModelTest):
+    # There is no `CpmModel`
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        return True
+
     def test_pre_tokenization(self):
         tokenizer = CpmTokenizer.from_pretrained("TsinghuaAI/CPM-Generate")
         text = "Hugging Face大法好，谁用谁知道。"
diff --git a/tests/models/luke/test_modeling_luke.py b/tests/models/luke/test_modeling_luke.py
index 1ab23392da..35bdb6b6d5 100644
--- a/tests/models/luke/test_modeling_luke.py
+++ b/tests/models/luke/test_modeling_luke.py
@@ -619,6 +619,15 @@ class LukeModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     test_resize_embeddings = True
     test_head_masking = True
 
+    # TODO: Fix the failed tests
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        if pipeline_test_casse_name in ["QAPipelineTests", "ZeroShotClassificationPipelineTests"]:
+            return True
+
+        return False
+
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
         entity_inputs_dict = {k: v for k, v in inputs_dict.items() if k.startswith("entity")}
         inputs_dict = {k: v for k, v in inputs_dict.items() if not k.startswith("entity")}
diff --git a/tests/models/markuplm/test_modeling_markuplm.py b/tests/models/markuplm/test_modeling_markuplm.py
index 3abdb4041a..d52129cf2c 100644
--- a/tests/models/markuplm/test_modeling_markuplm.py
+++ b/tests/models/markuplm/test_modeling_markuplm.py
@@ -299,6 +299,14 @@ class MarkupLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
         else {}
     )
 
+    # TODO: Fix the failed tests
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        # ValueError: Nodes must be of type `List[str]` (single pretokenized example), or `List[List[str]]`
+        # (batch of pretokenized examples).
+        return True
+
     def setUp(self):
         self.model_tester = MarkupLMModelTester(self)
         self.config_tester = ConfigTester(self, config_class=MarkupLMConfig, hidden_size=37)
diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py
index b52d2f04d0..607babb1ba 100644
--- a/tests/models/mbart/test_modeling_mbart.py
+++ b/tests/models/mbart/test_modeling_mbart.py
@@ -252,6 +252,16 @@ class MBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
     test_pruning = False
     test_missing_keys = False
 
+    # TODO: Fix the failed tests
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        if pipeline_test_casse_name != "FeatureExtractionPipelineTests":
+            # IndexError: index out of range in self
+            return True
+
+        return False
+
     def setUp(self):
         self.model_tester = MBartModelTester(self)
         self.config_tester = ConfigTester(self, config_class=MBartConfig)
diff --git a/tests/models/mbart/test_modeling_tf_mbart.py b/tests/models/mbart/test_modeling_tf_mbart.py
index 52cd24be27..c3e5721473 100644
--- a/tests/models/mbart/test_modeling_tf_mbart.py
+++ b/tests/models/mbart/test_modeling_tf_mbart.py
@@ -198,6 +198,16 @@ class TFMBartModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCas
     test_pruning = False
     test_onnx = False
 
+    # TODO: Fix the failed tests
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        if pipeline_test_casse_name != "FeatureExtractionPipelineTests":
+            # Exception encountered when calling layer '...'
+            return True
+
+        return False
+
     def setUp(self):
         self.model_tester = TFMBartModelTester(self)
         self.config_tester = ConfigTester(self, config_class=MBartConfig)
diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py
index 8524c5b42c..f0ba1a00f5 100644
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -291,6 +291,17 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
 
     input_name = "input_features"
 
+    # TODO: Fix the failed tests
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        if pipeline_test_casse_name == "AutomaticSpeechRecognitionPipelineTests":
+            # RuntimeError: The size of tensor a (1500) must match the size of tensor b (30) at non-singleton
+            # dimension 1
+            return True
+
+        return False
+
     def setUp(self):
         self.model_tester = WhisperModelTester(self)
         self.config_tester = ConfigTester(self, config_class=WhisperConfig)
diff --git a/tests/models/xlnet/test_modeling_tf_xlnet.py b/tests/models/xlnet/test_modeling_tf_xlnet.py
index bc65b0501e..bbc310aa8b 100644
--- a/tests/models/xlnet/test_modeling_tf_xlnet.py
+++ b/tests/models/xlnet/test_modeling_tf_xlnet.py
@@ -363,6 +363,13 @@ class TFXLNetModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCas
     test_head_masking = False
     test_onnx = False
 
+    # TODO: Fix the failed tests
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        # Exception encountered when calling layer '...'
+        return True
+
     def setUp(self):
         self.model_tester = TFXLNetModelTester(self)
         self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
diff --git a/tests/models/xlnet/test_modeling_xlnet.py b/tests/models/xlnet/test_modeling_xlnet.py
index 2b99d2e17c..98c935cdec 100644
--- a/tests/models/xlnet/test_modeling_xlnet.py
+++ b/tests/models/xlnet/test_modeling_xlnet.py
@@ -542,6 +542,13 @@ class XLNetModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
     fx_compatible = False
     test_pruning = False
 
+    # TODO: Fix the failed tests
+    def is_pipeline_test_to_skip(
+        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
+    ):
+        # IndexError: index out of range in self
+        return True
+
     # XLNet has 2 QA models -> need to manually set the correct labels for one of them here
     def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
         inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
diff --git a/tests/test_pipeline_mixin.py b/tests/test_pipeline_mixin.py
index fbeac086e1..cfe10ea36a 100644
--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -28,7 +28,7 @@ from transformers.testing_utils import (
     require_torch_or_tf,
     require_vision,
 )
-from transformers.utils import direct_transformers_import
+from transformers.utils import direct_transformers_import, logging
 
 from .pipelines.test_pipelines_audio_classification import AudioClassificationPipelineTests
 from .pipelines.test_pipelines_automatic_speech_recognition import AutomaticSpeechRecognitionPipelineTests
@@ -104,6 +104,8 @@ PATH_TO_TRANSFORMERS = os.path.join(Path(__file__).parent.parent, "src/transform
 # Dynamically import the Transformers module to grab the attribute classes of the processor form their names.
 transformers_module = direct_transformers_import(PATH_TO_TRANSFORMERS)
 
+logger = logging.get_logger(__name__)
+
 
 class PipelineTesterMixin:
     model_tester = None
@@ -179,11 +181,12 @@ class PipelineTesterMixin:
                     tokenizer_name,
                     processor_name,
                 ):
-                    self.skipTest(
+                    logger.warning(
                         f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is "
                         f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer "
                         f"`{tokenizer_name}` | processor `{processor_name}`."
                     )
+                    continue
                 self.run_pipeline_test(task, repo_name, model_architecture, tokenizer_name, processor_name)
 
     def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, processor_name):
@@ -217,26 +220,29 @@ class PipelineTesterMixin:
             try:
                 processor = processor_class.from_pretrained(repo_id)
             except Exception:
-                self.skipTest(
+                logger.warning(
                     f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not load the "
                     f"processor from `{repo_id}` with `{processor_name}`."
                 )
+                return
 
         # TODO: Maybe not upload such problematic tiny models to Hub.
         if tokenizer is None and processor is None:
-            self.skipTest(
+            logger.warning(
                 f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load "
                 f"any tokenizer / processor from `{repo_id}`."
             )
+            return
 
         # TODO: We should check if a model file is on the Hub repo. instead.
         try:
             model = model_architecture.from_pretrained(repo_id)
         except Exception:
-            self.skipTest(
+            logger.warning(
                 f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load "
                 f"the model from `{repo_id}` with `{model_architecture}`."
             )
+            return
 
         # validate
         validate_test_components(self, task, model, tokenizer, processor)
@@ -252,10 +258,11 @@ class PipelineTesterMixin:
         if pipeline is None:
             # The test can disable itself, but it should be very marginal
             # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
-            self.skipTest(
+            logger.warning(
                 f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not get the "
                 "pipeline for testing."
             )
+            return
 
         task_test.run_pipeline_test(pipeline, examples)
 
@@ -429,10 +436,3 @@ def validate_test_components(test_case, task, model, tokenizer, processor):
             raise ValueError(
                 "Could not determine `vocab_size` from model configuration while `tokenizer` is not `None`."
             )
-        # TODO: Remove tiny models from the Hub which have problematic tokenizers (but still keep this block)
-        if config_vocab_size is not None and len(tokenizer) > config_vocab_size:
-            test_case.skipTest(
-                f"{test_case.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: tokenizer "
-                f"(`{tokenizer.__class__.__name__}`) has {len(tokenizer)} tokens which is greater than "
-                f"`config_vocab_size` ({config_vocab_size}). Something is wrong."
-            )