🔥Rework pipeline testing by removing PipelineTestCaseMeta 🚀 (#21516)

* Add PipelineTesterMixin

* remove class PipelineTestCaseMeta

* move validate_test_components

* Add for ViT

* Add to SPECIAL_MODULE_TO_TEST_MAP

* style and quality

* Add feature-extraction

* update

* raise instead of skip

* add tiny_model_summary.json

* more explicit

* skip tasks not in mapping

* add availability check

* Add Copyright

* A way to diable irrelevant tests

* update with main

* remove disable_irrelevant_tests

* skip tests

* better skip message

* better skip message

* Add all pipeline task tests

* revert

* Import PipelineTesterMixin

* subclass test classes with PipelineTesterMixin

* Add pipieline_model_mapping

* Fix import after adding pipieline_model_mapping

* Fix style and quality after adding pipieline_model_mapping

* Fix one more import after adding pipieline_model_mapping

* Fix style and quality after adding pipieline_model_mapping

* Fix test issues

* Fix import requirements

* Fix mapping for MobileViTModelTest

* Update

* Better skip message

* pipieline_model_mapping could not be None

* Remove some PipelineTesterMixin

* Fix typo

* revert tests_fetcher.py

* update

* rename

* revert

* Remove PipelineTestCaseMeta from ZeroShotAudioClassificationPipelineTests

* style and quality

* test fetcher for all pipeline/model tests

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2023-02-28 19:40:57 +01:00
committed by GitHub
parent 4cb5ffa93d
commit 871c31a6f1
243 changed files with 5871 additions and 523 deletions

View File

@@ -12,20 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import logging
import os
import random
import sys
import tempfile
import unittest
from abc import abstractmethod
from pathlib import Path
from unittest import skipIf
import datasets
import numpy as np
import requests
from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token
from requests.exceptions import HTTPError
@@ -82,255 +77,6 @@ class ANY:
return f"ANY({', '.join(_type.__name__ for _type in self._types)})"
def is_test_to_skip(test_casse_name, config_class, model_architecture, tokenizer_name, processor_name):
"""Some tests are just not working"""
to_skip = False
if config_class.__name__ == "RoCBertConfig" and test_casse_name in [
"FillMaskPipelineTests",
"FeatureExtractionPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
]:
# Get error: IndexError: index out of range in self.
# `word_shape_file` and `word_pronunciation_file` should be shrunk during tiny model creation,
# otherwise `IndexError` could occur in some embedding layers. Skip for now until this model has
# more usage.
to_skip = True
elif config_class.__name__ in ["LayoutLMv3Config", "LiltConfig"]:
# Get error: ValueError: Words must be of type `List[str]`. Previously, `LayoutLMv3` is not
# used in pipeline tests as it could not find a checkpoint
# TODO: check and fix if possible
to_skip = True
# config/model class we decide to skip
elif config_class.__name__ in ["TapasConfig"]:
# Get error: AssertionError: Table must be of type pd.DataFrame. Also, the tiny model has large
# vocab size as the fast tokenizer could not be converted. Previous, `Tapas` is not used in
# pipeline tests due to the same reason.
# TODO: check and fix if possible
to_skip = True
# TODO: check and fix if possible
if not to_skip and tokenizer_name is not None:
if (
test_casse_name == "QAPipelineTests"
and not tokenizer_name.endswith("Fast")
and config_class.__name__
in [
"FlaubertConfig",
"GPTJConfig",
"LongformerConfig",
"MvpConfig",
"OPTConfig",
"ReformerConfig",
"XLMConfig",
]
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
to_skip = True
elif test_casse_name == "ZeroShotClassificationPipelineTests" and config_class.__name__ in [
"CTRLConfig",
"OpenAIGPTConfig",
]:
# Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
# `CTRLConfig` and `OpenAIGPTConfig` were never used in pipeline tests, either because of a missing
# checkpoint or because a tiny config could not be created
to_skip = True
elif test_casse_name == "TranslationPipelineTests" and config_class.__name__ in [
"M2M100Config",
"PLBartConfig",
]:
# Get `ValueError: Translation requires a `src_lang` and a `tgt_lang` for this model`.
# `M2M100Config` and `PLBartConfig` were never used in pipeline tests: cannot create a simple tokenizer
to_skip = True
elif test_casse_name == "TextGenerationPipelineTests" and config_class.__name__ in [
"ProphetNetConfig",
"TransfoXLConfig",
]:
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `TransfoXLConfig` and `ProphetNetConfig` were never used in pipeline tests: cannot create a simple
# tokenizer.
to_skip = True
elif test_casse_name == "FillMaskPipelineTests" and config_class.__name__ in [
"FlaubertConfig",
"XLMConfig",
]:
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `FlaubertConfig` and `TransfoXLConfig` were never used in pipeline tests: cannot create a simple
# tokenizer
to_skip = True
elif test_casse_name == "TextGenerationPipelineTests" and model_architecture.__name__ in [
"TFRoFormerForCausalLM"
]:
# TODO: add `prepare_inputs_for_generation` for `TFRoFormerForCausalLM`
to_skip = True
elif test_casse_name == "QAPipelineTests" and model_architecture.__name__ in ["FNetForQuestionAnswering"]:
# TODO: The change in `base.py` in the PR #21132 (https://github.com/huggingface/transformers/pull/21132)
# fails this test case. Skip for now - a fix for this along with the initial changes in PR #20426 is
# too much. Let `ydshieh` to fix it ASAP once #20426 is merged.
to_skip = True
elif config_class.__name__ == "LayoutLMv2Config" and test_casse_name in [
"QAPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
"ZeroShotClassificationPipelineTests",
]:
# `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
# config. With new tiny model creation, it is available, but we need to fix the failed tests.
to_skip = True
elif test_casse_name == "DocumentQuestionAnsweringPipelineTests" and not tokenizer_name.endswith("Fast"):
# This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
to_skip = True
return to_skip
def validate_test_components(test_case, model, tokenizer, processor):
# TODO: Move this to tiny model creation script
# head-specific (within a model type) necessary changes to the config
# 1. for `BlenderbotForCausalLM`
if model.__class__.__name__ == "BlenderbotForCausalLM":
model.config.encoder_no_repeat_ngram_size = 0
# TODO: Change the tiny model creation script: don't create models with problematic tokenizers
# Avoid `IndexError` in embedding layers
CONFIG_WITHOUT_VOCAB_SIZE = ["CanineConfig"]
if tokenizer is not None:
config_vocab_size = getattr(model.config, "vocab_size", None)
# For CLIP-like models
if config_vocab_size is None and hasattr(model.config, "text_config"):
config_vocab_size = getattr(model.config.text_config, "vocab_size", None)
if config_vocab_size is None and model.config.__class__.__name__ not in CONFIG_WITHOUT_VOCAB_SIZE:
raise ValueError(
"Could not determine `vocab_size` from model configuration while `tokenizer` is not `None`."
)
# TODO: Remove tiny models from the Hub which have problematic tokenizers (but still keep this block)
if config_vocab_size is not None and len(tokenizer) > config_vocab_size:
test_case.skipTest(
f"Ignore {model.__class__.__name__}: `tokenizer` ({tokenizer.__class__.__name__}) has"
f" {len(tokenizer)} tokens which is greater than `config_vocab_size`"
f" ({config_vocab_size}). Something is wrong."
)
class PipelineTestCaseMeta(type):
def __new__(mcs, name, bases, dct):
def gen_test(repo_name, model_architecture, tokenizer_name, processor_name):
@skipIf(
tokenizer_name is None and processor_name is None,
f"Ignore {model_architecture.__name__}: no processor class is provided (tokenizer, image processor,"
" feature extractor, etc)",
)
def test(self):
repo_id = f"hf-internal-testing/{repo_name}"
tokenizer = None
if tokenizer_name is not None:
tokenizer_class = getattr(transformers_module, tokenizer_name)
tokenizer = tokenizer_class.from_pretrained(repo_id)
processor = None
if processor_name is not None:
processor_class = getattr(transformers_module, processor_name)
# If the required packages (like `Pillow`) are not installed, this will fail.
try:
processor = processor_class.from_pretrained(repo_id)
except Exception:
self.skipTest(f"Ignore {model_architecture.__name__}: could not load the model from {repo_id}")
try:
model = model_architecture.from_pretrained(repo_id)
except Exception:
self.skipTest(f"Ignore {model_architecture.__name__}: could not load the model from {repo_id}")
# validate
validate_test_components(self, model, tokenizer, processor)
if hasattr(model, "eval"):
model = model.eval()
pipeline, examples = self.get_test_pipeline(model, tokenizer, processor)
if pipeline is None:
# The test can disable itself, but it should be very marginal
# Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
self.skipTest(f"Ignore {model_architecture.__name__}: could not create the pipeline")
self.run_pipeline_test(pipeline, examples)
def run_batch_test(pipeline, examples):
# Need to copy because `Conversation` are stateful
if pipeline.tokenizer is not None and pipeline.tokenizer.pad_token_id is None:
return # No batching for this and it's OK
# 10 examples with batch size 4 means there needs to be a unfinished batch
# which is important for the unbatcher
def data(n):
for _ in range(n):
# Need to copy because Conversation object is mutated
yield copy.deepcopy(random.choice(examples))
out = []
for item in pipeline(data(10), batch_size=4):
out.append(item)
self.assertEqual(len(out), 10)
run_batch_test(pipeline, examples)
return test
# Download tiny model summary (used to avoid requesting from Hub too many times)
url = "https://huggingface.co/datasets/hf-internal-testing/tiny-random-model-summary/raw/main/processor_classes.json"
tiny_model_summary = requests.get(url).json()
for prefix, key in [("pt", "model_mapping"), ("tf", "tf_model_mapping")]:
mapping = dct.get(key, {})
if mapping:
for config_class, model_architectures in mapping.items():
if not isinstance(model_architectures, tuple):
model_architectures = (model_architectures,)
for model_architecture in model_architectures:
model_arch_name = model_architecture.__name__
# Get the canonical name
for _prefix in ["Flax", "TF"]:
if model_arch_name.startswith(_prefix):
model_arch_name = model_arch_name[len(_prefix) :]
break
tokenizer_names = []
processor_names = []
if model_arch_name in tiny_model_summary:
tokenizer_names = tiny_model_summary[model_arch_name]["tokenizer_classes"]
processor_names = tiny_model_summary[model_arch_name]["processor_classes"]
# Adding `None` (if empty) so we can generate tests
tokenizer_names = [None] if len(tokenizer_names) == 0 else tokenizer_names
processor_names = [None] if len(processor_names) == 0 else processor_names
repo_name = f"tiny-random-{model_arch_name}"
for tokenizer_name in tokenizer_names:
for processor_name in processor_names:
if is_test_to_skip(
name, config_class, model_architecture, tokenizer_name, processor_name
):
continue
test_name = f"test_{prefix}_{config_class.__name__}_{model_architecture.__name__}_{tokenizer_name}_{processor_name}"
dct[test_name] = gen_test(
repo_name, model_architecture, tokenizer_name, processor_name
)
@abstractmethod
def inner(self):
raise NotImplementedError("Not implemented test")
# Force these 2 methods to exist
dct["test_small_model_pt"] = dct.get("test_small_model_pt", inner)
dct["test_small_model_tf"] = dct.get("test_small_model_tf", inner)
return type.__new__(mcs, name, bases, dct)
class CommonPipelineTest(unittest.TestCase):
@require_torch
def test_pipeline_iteration(self):