🔥Rework pipeline testing by removing PipelineTestCaseMeta 🚀 (#21516)

* Add PipelineTesterMixin * remove class PipelineTestCaseMeta * move validate_test_components * Add for ViT * Add to SPECIAL_MODULE_TO_TEST_MAP * style and quality * Add feature-extraction * update * raise instead of skip * add tiny_model_summary.json * more explicit * skip tasks not in mapping * add availability check * Add Copyright * A way to diable irrelevant tests * update with main * remove disable_irrelevant_tests * skip tests * better skip message * better skip message * Add all pipeline task tests * revert * Import PipelineTesterMixin * subclass test classes with PipelineTesterMixin * Add pipieline_model_mapping * Fix import after adding pipieline_model_mapping * Fix style and quality after adding pipieline_model_mapping * Fix one more import after adding pipieline_model_mapping * Fix style and quality after adding pipieline_model_mapping * Fix test issues * Fix import requirements * Fix mapping for MobileViTModelTest * Update * Better skip message * pipieline_model_mapping could not be None * Remove some PipelineTesterMixin * Fix typo * revert tests_fetcher.py * update * rename * revert * Remove PipelineTestCaseMeta from ZeroShotAudioClassificationPipelineTests * style and quality * test fetcher for all pipeline/model tests --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2023-02-28 19:40:57 +01:00
parent 4cb5ffa93d
commit 871c31a6f1
243 changed files with 5871 additions and 523 deletions
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -12,20 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import copy
 import logging
 import os
-import random
 import sys
 import tempfile
 import unittest
-from abc import abstractmethod
 from pathlib import Path
-from unittest import skipIf

 import datasets
 import numpy as np
-import requests
 from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token
 from requests.exceptions import HTTPError

@@ -82,255 +77,6 @@ class ANY:
        return f"ANY({', '.join(_type.__name__ for _type in self._types)})"


-def is_test_to_skip(test_casse_name, config_class, model_architecture, tokenizer_name, processor_name):
-    """Some tests are just not working"""
-
-    to_skip = False
-
-    if config_class.__name__ == "RoCBertConfig" and test_casse_name in [
-        "FillMaskPipelineTests",
-        "FeatureExtractionPipelineTests",
-        "TextClassificationPipelineTests",
-        "TokenClassificationPipelineTests",
-    ]:
-        # Get error: IndexError: index out of range in self.
-        # `word_shape_file` and `word_pronunciation_file` should be shrunk during tiny model creation,
-        # otherwise `IndexError` could occur in some embedding layers. Skip for now until this model has
-        # more usage.
-        to_skip = True
-    elif config_class.__name__ in ["LayoutLMv3Config", "LiltConfig"]:
-        # Get error: ValueError: Words must be of type `List[str]`. Previously, `LayoutLMv3` is not
-        # used in pipeline tests as it could not find a checkpoint
-        # TODO: check and fix if possible
-        to_skip = True
-    # config/model class we decide to skip
-    elif config_class.__name__ in ["TapasConfig"]:
-        # Get error: AssertionError: Table must be of type pd.DataFrame. Also, the tiny model has large
-        # vocab size as the fast tokenizer could not be converted. Previous, `Tapas` is not used in
-        # pipeline tests due to the same reason.
-        # TODO: check and fix if possible
-        to_skip = True
-
-    # TODO: check and fix if possible
-    if not to_skip and tokenizer_name is not None:
-        if (
-            test_casse_name == "QAPipelineTests"
-            and not tokenizer_name.endswith("Fast")
-            and config_class.__name__
-            in [
-                "FlaubertConfig",
-                "GPTJConfig",
-                "LongformerConfig",
-                "MvpConfig",
-                "OPTConfig",
-                "ReformerConfig",
-                "XLMConfig",
-            ]
-        ):
-            # `QAPipelineTests` fails for a few models when the slower tokenizer are used.
-            # (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
-            # TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
-            to_skip = True
-        elif test_casse_name == "ZeroShotClassificationPipelineTests" and config_class.__name__ in [
-            "CTRLConfig",
-            "OpenAIGPTConfig",
-        ]:
-            # Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
-            # `CTRLConfig` and `OpenAIGPTConfig` were never used in pipeline tests, either because of a missing
-            # checkpoint or because a tiny config could not be created
-            to_skip = True
-        elif test_casse_name == "TranslationPipelineTests" and config_class.__name__ in [
-            "M2M100Config",
-            "PLBartConfig",
-        ]:
-            # Get `ValueError: Translation requires a `src_lang` and a `tgt_lang` for this model`.
-            # `M2M100Config` and `PLBartConfig` were never used in pipeline tests: cannot create a simple tokenizer
-            to_skip = True
-        elif test_casse_name == "TextGenerationPipelineTests" and config_class.__name__ in [
-            "ProphetNetConfig",
-            "TransfoXLConfig",
-        ]:
-            # Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
-            # `TransfoXLConfig` and `ProphetNetConfig` were never used in pipeline tests: cannot create a simple
-            # tokenizer.
-            to_skip = True
-        elif test_casse_name == "FillMaskPipelineTests" and config_class.__name__ in [
-            "FlaubertConfig",
-            "XLMConfig",
-        ]:
-            # Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
-            # `FlaubertConfig` and `TransfoXLConfig` were never used in pipeline tests: cannot create a simple
-            # tokenizer
-            to_skip = True
-        elif test_casse_name == "TextGenerationPipelineTests" and model_architecture.__name__ in [
-            "TFRoFormerForCausalLM"
-        ]:
-            # TODO: add `prepare_inputs_for_generation` for `TFRoFormerForCausalLM`
-            to_skip = True
-        elif test_casse_name == "QAPipelineTests" and model_architecture.__name__ in ["FNetForQuestionAnswering"]:
-            # TODO: The change in `base.py` in the PR #21132 (https://github.com/huggingface/transformers/pull/21132)
-            #       fails this test case. Skip for now - a fix for this along with the initial changes in PR #20426 is
-            #       too much. Let `ydshieh` to fix it ASAP once #20426 is merged.
-            to_skip = True
-        elif config_class.__name__ == "LayoutLMv2Config" and test_casse_name in [
-            "QAPipelineTests",
-            "TextClassificationPipelineTests",
-            "TokenClassificationPipelineTests",
-            "ZeroShotClassificationPipelineTests",
-        ]:
-            # `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
-            # config. With new tiny model creation, it is available, but we need to fix the failed tests.
-            to_skip = True
-        elif test_casse_name == "DocumentQuestionAnsweringPipelineTests" and not tokenizer_name.endswith("Fast"):
-            # This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
-            to_skip = True
-
-    return to_skip
-
-
-def validate_test_components(test_case, model, tokenizer, processor):
-    # TODO: Move this to tiny model creation script
-    # head-specific (within a model type) necessary changes to the config
-    # 1. for `BlenderbotForCausalLM`
-    if model.__class__.__name__ == "BlenderbotForCausalLM":
-        model.config.encoder_no_repeat_ngram_size = 0
-
-    # TODO: Change the tiny model creation script: don't create models with problematic tokenizers
-    # Avoid `IndexError` in embedding layers
-    CONFIG_WITHOUT_VOCAB_SIZE = ["CanineConfig"]
-    if tokenizer is not None:
-        config_vocab_size = getattr(model.config, "vocab_size", None)
-        # For CLIP-like models
-        if config_vocab_size is None and hasattr(model.config, "text_config"):
-            config_vocab_size = getattr(model.config.text_config, "vocab_size", None)
-        if config_vocab_size is None and model.config.__class__.__name__ not in CONFIG_WITHOUT_VOCAB_SIZE:
-            raise ValueError(
-                "Could not determine `vocab_size` from model configuration while `tokenizer` is not `None`."
-            )
-        # TODO: Remove tiny models from the Hub which have problematic tokenizers (but still keep this block)
-        if config_vocab_size is not None and len(tokenizer) > config_vocab_size:
-            test_case.skipTest(
-                f"Ignore {model.__class__.__name__}: `tokenizer` ({tokenizer.__class__.__name__}) has"
-                f" {len(tokenizer)} tokens which is greater than `config_vocab_size`"
-                f" ({config_vocab_size}). Something is wrong."
-            )
-
-
-class PipelineTestCaseMeta(type):
-    def __new__(mcs, name, bases, dct):
-        def gen_test(repo_name, model_architecture, tokenizer_name, processor_name):
-            @skipIf(
-                tokenizer_name is None and processor_name is None,
-                f"Ignore {model_architecture.__name__}: no processor class is provided (tokenizer, image processor,"
-                " feature extractor, etc)",
-            )
-            def test(self):
-                repo_id = f"hf-internal-testing/{repo_name}"
-
-                tokenizer = None
-                if tokenizer_name is not None:
-                    tokenizer_class = getattr(transformers_module, tokenizer_name)
-                    tokenizer = tokenizer_class.from_pretrained(repo_id)
-
-                processor = None
-                if processor_name is not None:
-                    processor_class = getattr(transformers_module, processor_name)
-                    # If the required packages (like `Pillow`) are not installed, this will fail.
-                    try:
-                        processor = processor_class.from_pretrained(repo_id)
-                    except Exception:
-                        self.skipTest(f"Ignore {model_architecture.__name__}: could not load the model from {repo_id}")
-
-                try:
-                    model = model_architecture.from_pretrained(repo_id)
-                except Exception:
-                    self.skipTest(f"Ignore {model_architecture.__name__}: could not load the model from {repo_id}")
-
-                # validate
-                validate_test_components(self, model, tokenizer, processor)
-
-                if hasattr(model, "eval"):
-                    model = model.eval()
-
-                pipeline, examples = self.get_test_pipeline(model, tokenizer, processor)
-                if pipeline is None:
-                    # The test can disable itself, but it should be very marginal
-                    # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
-                    self.skipTest(f"Ignore {model_architecture.__name__}: could not create the pipeline")
-                self.run_pipeline_test(pipeline, examples)
-
-                def run_batch_test(pipeline, examples):
-                    # Need to copy because `Conversation` are stateful
-                    if pipeline.tokenizer is not None and pipeline.tokenizer.pad_token_id is None:
-                        return  # No batching for this and it's OK
-
-                    # 10 examples with batch size 4 means there needs to be a unfinished batch
-                    # which is important for the unbatcher
-                    def data(n):
-                        for _ in range(n):
-                            # Need to copy because Conversation object is mutated
-                            yield copy.deepcopy(random.choice(examples))
-
-                    out = []
-                    for item in pipeline(data(10), batch_size=4):
-                        out.append(item)
-                    self.assertEqual(len(out), 10)
-
-                run_batch_test(pipeline, examples)
-
-            return test
-
-        # Download tiny model summary (used to avoid requesting from Hub too many times)
-        url = "https://huggingface.co/datasets/hf-internal-testing/tiny-random-model-summary/raw/main/processor_classes.json"
-        tiny_model_summary = requests.get(url).json()
-
-        for prefix, key in [("pt", "model_mapping"), ("tf", "tf_model_mapping")]:
-            mapping = dct.get(key, {})
-            if mapping:
-                for config_class, model_architectures in mapping.items():
-                    if not isinstance(model_architectures, tuple):
-                        model_architectures = (model_architectures,)
-
-                    for model_architecture in model_architectures:
-                        model_arch_name = model_architecture.__name__
-                        # Get the canonical name
-                        for _prefix in ["Flax", "TF"]:
-                            if model_arch_name.startswith(_prefix):
-                                model_arch_name = model_arch_name[len(_prefix) :]
-                                break
-
-                        tokenizer_names = []
-                        processor_names = []
-                        if model_arch_name in tiny_model_summary:
-                            tokenizer_names = tiny_model_summary[model_arch_name]["tokenizer_classes"]
-                            processor_names = tiny_model_summary[model_arch_name]["processor_classes"]
-                        # Adding `None` (if empty) so we can generate tests
-                        tokenizer_names = [None] if len(tokenizer_names) == 0 else tokenizer_names
-                        processor_names = [None] if len(processor_names) == 0 else processor_names
-
-                        repo_name = f"tiny-random-{model_arch_name}"
-                        for tokenizer_name in tokenizer_names:
-                            for processor_name in processor_names:
-                                if is_test_to_skip(
-                                    name, config_class, model_architecture, tokenizer_name, processor_name
-                                ):
-                                    continue
-                                test_name = f"test_{prefix}_{config_class.__name__}_{model_architecture.__name__}_{tokenizer_name}_{processor_name}"
-                                dct[test_name] = gen_test(
-                                    repo_name, model_architecture, tokenizer_name, processor_name
-                                )
-
-        @abstractmethod
-        def inner(self):
-            raise NotImplementedError("Not implemented test")
-
-        # Force these 2 methods to exist
-        dct["test_small_model_pt"] = dct.get("test_small_model_pt", inner)
-        dct["test_small_model_tf"] = dct.get("test_small_model_tf", inner)
-
-        return type.__new__(mcs, name, bases, dct)
-
-
 class CommonPipelineTest(unittest.TestCase):
    @require_torch
    def test_pipeline_iteration(self):