Pipeline testing - using tiny models on Hub (#20426)

* rework pipeline tests

* run pipeline tests

* fix

* fix

* fix

* revert the changes in get_test_pipeline() parameter list

* fix expected error message

* skip a test

* clean up

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2023-01-30 10:39:43 +01:00
committed by GitHub
parent a582cfce3c
commit c749bd405e
26 changed files with 286 additions and 274 deletions

View File

@@ -17,26 +17,20 @@ import importlib
import logging
import os
import random
import string
import sys
import tempfile
import unittest
from abc import abstractmethod
from functools import lru_cache
from pathlib import Path
from unittest import skipIf
import datasets
import numpy as np
import requests
from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token
from requests.exceptions import HTTPError
from transformers import (
FEATURE_EXTRACTOR_MAPPING,
IMAGE_PROCESSOR_MAPPING,
TOKENIZER_MAPPING,
AutoFeatureExtractor,
AutoImageProcessor,
AutoModelForSequenceClassification,
AutoTokenizer,
DistilBertForSequenceClassification,
@@ -71,123 +65,16 @@ from test_module.custom_pipeline import PairClassificationPipeline # noqa E402
logger = logging.getLogger(__name__)
ROBERTA_EMBEDDING_ADJUSMENT_CONFIGS = [
"CamembertConfig",
"IBertConfig",
"LongformerConfig",
"MarkupLMConfig",
"RobertaConfig",
"RobertaPreLayerNormConfig",
"XLMRobertaConfig",
]
PATH_TO_TRANSFORMERS = os.path.join(Path(__file__).parent.parent.parent, "src/transformers")
def get_checkpoint_from_architecture(architecture):
try:
module = importlib.import_module(architecture.__module__)
except ImportError:
logger.error(f"Ignoring architecture {architecture}")
return
if hasattr(module, "_CHECKPOINT_FOR_DOC"):
return module._CHECKPOINT_FOR_DOC
else:
logger.warning(f"Can't retrieve checkpoint from {architecture.__name__}")
def get_tiny_config_from_class(configuration_class):
if "OpenAIGPT" in configuration_class.__name__:
# This is the only file that is inconsistent with the naming scheme.
# Will rename this file if we decide this is the way to go
return
model_type = configuration_class.model_type
camel_case_model_name = configuration_class.__name__.split("Config")[0]
try:
model_slug = model_type.replace("-", "_")
module = importlib.import_module(f".test_modeling_{model_slug}", package=f"tests.models.{model_slug}")
model_tester_class = getattr(module, f"{camel_case_model_name}ModelTester", None)
except (ImportError, AttributeError):
logger.error(f"No model tester class for {configuration_class.__name__}")
return
if model_tester_class is None:
logger.warning(f"No model tester class for {configuration_class.__name__}")
return
model_tester = model_tester_class(parent=None)
if hasattr(model_tester, "get_pipeline_config"):
config = model_tester.get_pipeline_config()
elif hasattr(model_tester, "get_config"):
config = model_tester.get_config()
else:
config = None
logger.warning(f"Model tester {model_tester_class.__name__} has no `get_config()`.")
return config
@lru_cache(maxsize=100)
def get_tiny_tokenizer_from_checkpoint(checkpoint):
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
if tokenizer.vocab_size < 300:
# Wav2Vec2ForCTC for instance
# ByT5Tokenizer
# all are already small enough and have no Fast version that can
# be retrained
return tokenizer
logger.info("Training new from iterator ...")
vocabulary = string.ascii_letters + string.digits + " "
tokenizer = tokenizer.train_new_from_iterator(vocabulary, vocab_size=len(vocabulary), show_progress=False)
logger.info("Trained.")
return tokenizer
def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_extractor_class):
try:
feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint)
except Exception:
try:
if feature_extractor_class is not None:
feature_extractor = feature_extractor_class()
else:
feature_extractor = None
except Exception:
feature_extractor = None
# Audio Spectogram Transformer specific.
if feature_extractor.__class__.__name__ == "ASTFeatureExtractor":
feature_extractor = feature_extractor.__class__(
max_length=tiny_config.max_length, num_mel_bins=tiny_config.num_mel_bins
)
# Speech2TextModel specific.
if hasattr(tiny_config, "input_feat_per_channel") and feature_extractor:
feature_extractor = feature_extractor.__class__(
feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
)
# TODO remove this, once those have been moved to `image_processor`.
if hasattr(tiny_config, "image_size") and feature_extractor:
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
return feature_extractor
def get_tiny_image_processor_from_checkpoint(checkpoint, tiny_config, image_processor_class):
try:
image_processor = AutoImageProcessor.from_pretrained(checkpoint)
except Exception:
try:
if image_processor_class is not None:
image_processor = image_processor_class()
else:
image_processor = None
except Exception:
image_processor = None
if hasattr(tiny_config, "image_size") and image_processor:
image_processor = image_processor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
return image_processor
# Dynamically import the Transformers module to grab the attribute classes of the processor form their names.
spec = importlib.util.spec_from_file_location(
"transformers",
os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"),
submodule_search_locations=[PATH_TO_TRANSFORMERS],
)
transformers_module = spec.loader.load_module()
class ANY:
@@ -201,76 +88,171 @@ class ANY:
return f"ANY({', '.join(_type.__name__ for _type in self._types)})"
def is_test_to_skip(test_casse_name, config_class, model_architecture, tokenizer_name, processor_name):
"""Some tests are just not working"""
to_skip = False
if config_class.__name__ == "RoCBertConfig" and test_casse_name in [
"FillMaskPipelineTests",
"FeatureExtractionPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
]:
# Get error: IndexError: index out of range in self.
# `word_shape_file` and `word_pronunciation_file` should be shrunk during tiny model creation,
# otherwise `IndexError` could occur in some embedding layers. Skip for now until this model has
# more usage.
to_skip = True
elif config_class.__name__ in ["LayoutLMv3Config", "LiltConfig"]:
# Get error: ValueError: Words must be of type `List[str]`. Previously, `LayoutLMv3` is not
# used in pipeline tests as it could not find a checkpoint
# TODO: check and fix if possible
to_skip = True
# config/model class we decide to skip
elif config_class.__name__ in ["TapasConfig"]:
# Get error: AssertionError: Table must be of type pd.DataFrame. Also, the tiny model has large
# vocab size as the fast tokenizer could not be converted. Previous, `Tapas` is not used in
# pipeline tests due to the same reason.
# TODO: check and fix if possible
to_skip = True
# TODO: check and fix if possible
if not to_skip and tokenizer_name is not None:
if (
test_casse_name == "QAPipelineTests"
and not tokenizer_name.endswith("Fast")
and config_class.__name__
in [
"FlaubertConfig",
"GPTJConfig",
"LongformerConfig",
"MvpConfig",
"OPTConfig",
"ReformerConfig",
"XLMConfig",
]
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
to_skip = True
elif test_casse_name == "ZeroShotClassificationPipelineTests" and config_class.__name__ in [
"CTRLConfig",
"OpenAIGPTConfig",
]:
# Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
# `CTRLConfig` and `OpenAIGPTConfig` were never used in pipeline tests, either because of a missing
# checkpoint or because a tiny config could not be created
to_skip = True
elif test_casse_name == "TranslationPipelineTests" and config_class.__name__ in [
"M2M100Config",
"PLBartConfig",
]:
# Get `ValueError: Translation requires a `src_lang` and a `tgt_lang` for this model`.
# `M2M100Config` and `PLBartConfig` were never used in pipeline tests: cannot create a simple tokenizer
to_skip = True
elif test_casse_name == "TextGenerationPipelineTests" and config_class.__name__ in [
"ProphetNetConfig",
"TransfoXLConfig",
]:
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `TransfoXLConfig` and `ProphetNetConfig` were never used in pipeline tests: cannot create a simple
# tokenizer.
to_skip = True
elif test_casse_name == "FillMaskPipelineTests" and config_class.__name__ in [
"FlaubertConfig",
"XLMConfig",
]:
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `FlaubertConfig` and `TransfoXLConfig` were never used in pipeline tests: cannot create a simple
# tokenizer
to_skip = True
elif test_casse_name == "TextGenerationPipelineTests" and model_architecture.__name__ in [
"TFRoFormerForCausalLM"
]:
# TODO: add `prepare_inputs_for_generation` for `TFRoFormerForCausalLM`
to_skip = True
elif test_casse_name == "QAPipelineTests" and model_architecture.__name__ in ["FNetForQuestionAnswering"]:
# TODO: The change in `base.py` in the PR #21132 (https://github.com/huggingface/transformers/pull/21132)
# fails this test case. Skip for now - a fix for this along with the initial changes in PR #20426 is
# too much. Let `ydshieh` to fix it ASAP once #20426 is merged.
to_skip = True
return to_skip
def validate_test_components(test_case, model, tokenizer, processor):
# TODO: Move this to tiny model creation script
# head-specific (within a model type) necessary changes to the config
# 1. for `BlenderbotForCausalLM`
if model.__class__.__name__ == "BlenderbotForCausalLM":
model.config.encoder_no_repeat_ngram_size = 0
# TODO: Change the tiny model creation script: don't create models with problematic tokenizers
# Avoid `IndexError` in embedding layers
CONFIG_WITHOUT_VOCAB_SIZE = ["CanineConfig"]
if tokenizer is not None:
config_vocab_size = getattr(model.config, "vocab_size", None)
# For CLIP-like models
if config_vocab_size is None and hasattr(model.config, "text_config"):
config_vocab_size = getattr(model.config.text_config, "vocab_size", None)
if config_vocab_size is None and model.config.__class__.__name__ not in CONFIG_WITHOUT_VOCAB_SIZE:
raise ValueError(
"Could not determine `vocab_size` from model configuration while `tokenizer` is not `None`."
)
# TODO: Remove tiny models from the Hub which have problematic tokenizers (but still keep this block)
if config_vocab_size is not None and len(tokenizer) > config_vocab_size:
test_case.skipTest(
f"Ignore {model.__class__.__name__}: `tokenizer` ({tokenizer.__class__.__name__}) has"
f" {len(tokenizer)} tokens which is greater than `config_vocab_size`"
f" ({config_vocab_size}). Something is wrong."
)
class PipelineTestCaseMeta(type):
def __new__(mcs, name, bases, dct):
def gen_test(
ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class, image_processor_class
):
def gen_test(repo_name, model_architecture, tokenizer_name, processor_name):
@skipIf(
tiny_config is None,
"TinyConfig does not exist, make sure that you defined a `_CONFIG_FOR_DOC` variable in the modeling"
" file",
)
@skipIf(
checkpoint is None,
"checkpoint does not exist, make sure that you defined a `_CHECKPOINT_FOR_DOC` variable in the"
" modeling file",
tokenizer_name is None and processor_name is None,
f"Ignore {model_architecture.__name__}: no processor class is provided (tokenizer, image processor,"
" feature extractor, etc)",
)
def test(self):
if ModelClass.__name__.endswith("ForCausalLM"):
tiny_config.is_encoder_decoder = False
if hasattr(tiny_config, "encoder_no_repeat_ngram_size"):
# specific for blenderbot which supports both decoder-only
# encoder/decoder but the test config only reflects
# encoder/decoder arch
tiny_config.encoder_no_repeat_ngram_size = 0
if ModelClass.__name__.endswith("WithLMHead"):
tiny_config.is_decoder = True
repo_id = f"hf-internal-testing/{repo_name}"
tokenizer = None
if tokenizer_name is not None:
tokenizer_class = getattr(transformers_module, tokenizer_name)
tokenizer = tokenizer_class.from_pretrained(repo_id)
processor = None
if processor_name is not None:
processor_class = getattr(transformers_module, processor_name)
# If the required packages (like `Pillow`) are not installed, this will fail.
try:
processor = processor_class.from_pretrained(repo_id)
except Exception:
self.skipTest(f"Ignore {model_architecture.__name__}: could not load the model from {repo_id}")
try:
model = ModelClass(tiny_config)
except ImportError as e:
self.skipTest(
f"Cannot run with {tiny_config} as the model requires a library that isn't installed: {e}"
)
model = model_architecture.from_pretrained(repo_id)
except Exception:
self.skipTest(f"Ignore {model_architecture.__name__}: could not load the model from {repo_id}")
# validate
validate_test_components(self, model, tokenizer, processor)
if hasattr(model, "eval"):
model = model.eval()
if tokenizer_class is not None:
try:
tokenizer = get_tiny_tokenizer_from_checkpoint(checkpoint)
# XLNet actually defines it as -1.
if model.config.__class__.__name__ in ROBERTA_EMBEDDING_ADJUSMENT_CONFIGS:
tokenizer.model_max_length = model.config.max_position_embeddings - 2
elif (
hasattr(model.config, "max_position_embeddings")
and model.config.max_position_embeddings > 0
):
tokenizer.model_max_length = model.config.max_position_embeddings
# Rust Panic exception are NOT Exception subclass
# Some test tokenizer contain broken vocabs or custom PreTokenizer, so we
# provide some default tokenizer and hope for the best.
except: # noqa: E722
self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
else:
tokenizer = None
feature_extractor = get_tiny_feature_extractor_from_checkpoint(
checkpoint, tiny_config, feature_extractor_class
)
image_processor = get_tiny_image_processor_from_checkpoint(
checkpoint, tiny_config, image_processor_class
)
if tokenizer is None and feature_extractor is None and image_processor:
self.skipTest(
f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor or image_processor"
" (PerceiverConfig with no FastTokenizer ?)"
)
pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor, image_processor)
pipeline, examples = self.get_test_pipeline(model, tokenizer, processor)
if pipeline is None:
# The test can disable itself, but it should be very marginal
# Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
return
self.skipTest(f"Ignore {model_architecture.__name__}: could not create the pipeline")
self.run_pipeline_test(pipeline, examples)
def run_batch_test(pipeline, examples):
@@ -294,52 +276,45 @@ class PipelineTestCaseMeta(type):
return test
# Download tiny model summary (used to avoid requesting from Hub too many times)
url = "https://huggingface.co/datasets/hf-internal-testing/tiny-random-model-summary/raw/main/processor_classes.json"
tiny_model_summary = requests.get(url).json()
for prefix, key in [("pt", "model_mapping"), ("tf", "tf_model_mapping")]:
mapping = dct.get(key, {})
if mapping:
for configuration, model_architectures in mapping.items():
for config_class, model_architectures in mapping.items():
if not isinstance(model_architectures, tuple):
model_architectures = (model_architectures,)
for model_architecture in model_architectures:
checkpoint = get_checkpoint_from_architecture(model_architecture)
tiny_config = get_tiny_config_from_class(configuration)
tokenizer_classes = TOKENIZER_MAPPING.get(configuration, [])
feature_extractor_class = FEATURE_EXTRACTOR_MAPPING.get(configuration, None)
feature_extractor_name = (
feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor"
)
image_processor_class = IMAGE_PROCESSOR_MAPPING.get(configuration, None)
image_processor_name = (
image_processor_class.__name__ if image_processor_class else "noimage_processor"
)
if not tokenizer_classes:
# We need to test even if there are no tokenizers.
tokenizer_classes = [None]
else:
# Remove the non defined tokenizers
# ByT5 and Perceiver are bytes-level and don't define
# FastTokenizer, we can just ignore those.
tokenizer_classes = [
tokenizer_class for tokenizer_class in tokenizer_classes if tokenizer_class is not None
]
model_arch_name = model_architecture.__name__
# Get the canonical name
for _prefix in ["Flax", "TF"]:
if model_arch_name.startswith(_prefix):
model_arch_name = model_arch_name[len(_prefix) :]
break
for tokenizer_class in tokenizer_classes:
if tokenizer_class is not None:
tokenizer_name = tokenizer_class.__name__
else:
tokenizer_name = "notokenizer"
tokenizer_names = []
processor_names = []
if model_arch_name in tiny_model_summary:
tokenizer_names = tiny_model_summary[model_arch_name]["tokenizer_classes"]
processor_names = tiny_model_summary[model_arch_name]["processor_classes"]
# Adding `None` (if empty) so we can generate tests
tokenizer_names = [None] if len(tokenizer_names) == 0 else tokenizer_names
processor_names = [None] if len(processor_names) == 0 else processor_names
test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}_{image_processor_name}"
if tokenizer_class is not None or feature_extractor_class is not None:
repo_name = f"tiny-random-{model_arch_name}"
for tokenizer_name in tokenizer_names:
for processor_name in processor_names:
if is_test_to_skip(
name, config_class, model_architecture, tokenizer_name, processor_name
):
continue
test_name = f"test_{prefix}_{config_class.__name__}_{model_architecture.__name__}_{tokenizer_name}_{processor_name}"
dct[test_name] = gen_test(
model_architecture,
checkpoint,
tiny_config,
tokenizer_class,
feature_extractor_class,
image_processor_class,
repo_name, model_architecture, tokenizer_name, processor_name
)
@abstractmethod