From 795c1444e9dc9b9ef132ef1036fe22a0f4b9e833 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 22 Jul 2021 15:19:35 +0200 Subject: [PATCH] Improving pipeline tests (#12784) * Proposal * Testing pipelines slightly better. - Overall same design - Metaclass to get proper different tests instead of subTest (not well supported by Pytest) - Added ANY meta object to make output checking more readable. - Skipping architectures either without tiny_config or without architecture. * Small fix. * Fixing the tests in case of None value. * Oups. * Rebased with more architectures. * Fixing reformer tests (no override anymore). * Adding more options for model tester config. Co-authored-by: Lysandre --- src/transformers/models/led/modeling_led.py | 8 ++ src/transformers/pipelines/base.py | 24 ++-- tests/test_modeling_mbart.py | 2 +- tests/test_modeling_reformer.py | 5 + tests/test_pipelines_common.py | 115 +++++++++++++++++++- tests/test_pipelines_text_classification.py | 64 +++++++++-- 6 files changed, 200 insertions(+), 18 deletions(-) diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index e926e00875..66c743d809 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -2194,6 +2194,14 @@ class LEDModel(LEDPreTrainedModel): use_cache = use_cache if use_cache is not None else self.config.use_cache return_dict = return_dict if return_dict is not None else self.config.use_return_dict + # Using this like Bart, as LED is derived from it. So far + # No checkpoint on the hub exists that uses that in practice. + # https://github.com/huggingface/transformers/blob/ac3cb660cad283163f7c73cad511124e845ca388/src/transformers/models/bart/modeling_bart.py#L1153 + if decoder_input_ids is None and decoder_inputs_embeds is None: + decoder_input_ids = shift_tokens_right( + input_ids, self.config.pad_token_id, self.config.decoder_start_token_id + ) + if encoder_outputs is None: encoder_outputs = self.encoder( input_ids=input_ids, diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 19d9840fc7..ba40fdbb6c 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -746,13 +746,23 @@ class Pipeline(_ScikitCompat): Parse arguments and tokenize """ # Parse arguments - inputs = self.tokenizer( - inputs, - add_special_tokens=add_special_tokens, - return_tensors=self.framework, - padding=padding, - truncation=truncation, - ) + try: + inputs = self.tokenizer( + inputs, + add_special_tokens=add_special_tokens, + return_tensors=self.framework, + padding=padding, + truncation=truncation, + ) + except ValueError: + # Can be linked to no padding token, if padding_token does not exist we should recover + inputs = self.tokenizer( + inputs, + add_special_tokens=add_special_tokens, + return_tensors=self.framework, + padding=False, + truncation=truncation, + ) return inputs diff --git a/tests/test_modeling_mbart.py b/tests/test_modeling_mbart.py index 5d556bf82e..229eb96e90 100644 --- a/tests/test_modeling_mbart.py +++ b/tests/test_modeling_mbart.py @@ -90,7 +90,7 @@ class MBartModelTester: hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, - max_position_embeddings=20, + max_position_embeddings=100, eos_token_id=2, pad_token_id=1, bos_token_id=0, diff --git a/tests/test_modeling_reformer.py b/tests/test_modeling_reformer.py index ad04643b76..37fc5285c1 100644 --- a/tests/test_modeling_reformer.py +++ b/tests/test_modeling_reformer.py @@ -186,6 +186,11 @@ class ReformerModelTester: hash_seed=self.hash_seed, ) + def get_pipeline_config(self): + config = self.get_config() + config.vocab_size = 100 + return config + def create_and_check_reformer_model(self, config, input_ids, input_mask, choice_labels): model = ReformerModel(config=config) model.to(torch_device) diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py index 5468e47427..8044f40093 100644 --- a/tests/test_pipelines_common.py +++ b/tests/test_pipelines_common.py @@ -12,15 +12,126 @@ # See the License for the specific language governing permissions and # limitations under the License. +import importlib +import logging +import string +from functools import lru_cache from typing import List, Optional -from unittest import mock +from unittest import mock, skipIf -from transformers import is_tf_available, is_torch_available, pipeline +from transformers import TOKENIZER_MAPPING, AutoTokenizer, is_tf_available, is_torch_available, pipeline from transformers.file_utils import to_py_obj from transformers.pipelines import Pipeline from transformers.testing_utils import _run_slow_tests, is_pipeline_test, require_tf, require_torch, slow +logger = logging.getLogger(__name__) + + +def get_checkpoint_from_architecture(architecture): + module = importlib.import_module(architecture.__module__) + + if hasattr(module, "_CHECKPOINT_FOR_DOC"): + return module._CHECKPOINT_FOR_DOC + else: + logger.warning(f"Can't retrieve checkpoint from {architecture.__name__}") + + +def get_tiny_config_from_class(configuration_class): + if "OpenAIGPT" in configuration_class.__name__: + # This is the only file that is inconsistent with the naming scheme. + # Will rename this file if we decide this is the way to go + return + + model_type = configuration_class.model_type + camel_case_model_name = configuration_class.__name__.split("Config")[0] + + module = importlib.import_module(f".test_modeling_{model_type.replace('-', '_')}", package="tests") + model_tester_class = getattr(module, f"{camel_case_model_name}ModelTester", None) + + if model_tester_class is None: + logger.warning(f"No model tester class for {configuration_class.__name__}") + return + + model_tester = model_tester_class(parent=None) + + if hasattr(model_tester, "get_pipeline_config"): + return model_tester.get_pipeline_config() + elif hasattr(model_tester, "get_config"): + return model_tester.get_config() + else: + logger.warning(f"Model tester {model_tester_class.__name__} has no `get_config()`.") + + +@lru_cache(maxsize=100) +def get_tiny_tokenizer_from_checkpoint(checkpoint): + tokenizer = AutoTokenizer.from_pretrained(checkpoint) + logger.warning("Training new from iterator ...") + vocabulary = string.ascii_letters + string.digits + " " + tokenizer = tokenizer.train_new_from_iterator(vocabulary, vocab_size=len(vocabulary), show_progress=False) + logger.warning("Trained.") + return tokenizer + + +class ANY: + def __init__(self, _type): + self._type = _type + + def __eq__(self, other): + return isinstance(other, self._type) + + def __repr__(self): + return f"ANY({self._type.__name__})" + + +class PipelineTestCaseMeta(type): + def __new__(mcs, name, bases, dct): + def gen_test(ModelClass, checkpoint, tiny_config, tokenizer_class): + @skipIf(tiny_config is None, "TinyConfig does not exist") + @skipIf(checkpoint is None, "checkpoint does not exist") + def test(self): + model = ModelClass(tiny_config) + if hasattr(model, "eval"): + model = model.eval() + try: + tokenizer = get_tiny_tokenizer_from_checkpoint(checkpoint) + tokenizer.model_max_length = model.config.max_position_embeddings + # Rust Panic exception are NOT Exception subclass + # Some test tokenizer contain broken vocabs or custom PreTokenizer, so we + # provide some default tokenizer and hope for the best. + except: # noqa: E722 + logger.warning(f"Tokenizer cannot be created from checkpoint {checkpoint}") + tokenizer = get_tiny_tokenizer_from_checkpoint("gpt2") + tokenizer.model_max_length = model.config.max_position_embeddings + self.run_pipeline_test(model, tokenizer) + + return test + + mapping = dct.get("model_mapping", {}) + if mapping: + for configuration, model_architecture in mapping.items(): + checkpoint = get_checkpoint_from_architecture(model_architecture) + tiny_config = get_tiny_config_from_class(configuration) + tokenizer_classes = TOKENIZER_MAPPING.get(configuration, []) + for tokenizer_class in tokenizer_classes: + if tokenizer_class is not None and tokenizer_class.__name__.endswith("Fast"): + test_name = f"test_pt_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_class.__name__}" + dct[test_name] = gen_test(model_architecture, checkpoint, tiny_config, tokenizer_class) + + tf_mapping = dct.get("tf_model_mapping", {}) + if tf_mapping: + for configuration, model_architecture in tf_mapping.items(): + checkpoint = get_checkpoint_from_architecture(model_architecture) + tiny_config = get_tiny_config_from_class(configuration) + tokenizer_classes = TOKENIZER_MAPPING.get(configuration, []) + for tokenizer_class in tokenizer_classes: + if tokenizer_class is not None and tokenizer_class.__name__.endswith("Fast"): + test_name = f"test_tf_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_class.__name__}" + dct[test_name] = gen_test(model_architecture, checkpoint, tiny_config, tokenizer_class) + + return type.__new__(mcs, name, bases, dct) + + VALID_INPUTS = ["A simple string", ["list of strings"]] diff --git a/tests/test_pipelines_text_classification.py b/tests/test_pipelines_text_classification.py index 7db8a24116..d77286f338 100644 --- a/tests/test_pipelines_text_classification.py +++ b/tests/test_pipelines_text_classification.py @@ -14,13 +14,61 @@ import unittest -from .test_pipelines_common import MonoInputPipelineCommonMixin +from transformers import ( + MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, + TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, + TextClassificationPipeline, + pipeline, +) +from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch, slow + +from .test_pipelines_common import ANY, PipelineTestCaseMeta -class TextClassificationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase): - pipeline_task = "sentiment-analysis" - small_models = [ - "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english" - ] # Default model - Models tested without the @slow decorator - large_models = [None] # Models tested with the @slow decorator - mandatory_keys = {"label", "score"} # Keys which should be in the output +@is_pipeline_test +class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): + model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING + tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING + + @slow + @require_torch + def test_pt_bert(self): + text_classifier = pipeline("text-classification") + + outputs = text_classifier("This is great !") + self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 1.0}]) + outputs = text_classifier("This is bad !") + self.assertEqual(nested_simplify(outputs), [{"label": "NEGATIVE", "score": 1.0}]) + outputs = text_classifier("Birds are a type of animal") + self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}]) + + @slow + @require_tf + def test_tf_bert(self): + text_classifier = pipeline("text-classification", framework="tf") + + outputs = text_classifier("This is great !") + self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 1.0}]) + outputs = text_classifier("This is bad !") + self.assertEqual(nested_simplify(outputs), [{"label": "NEGATIVE", "score": 1.0}]) + outputs = text_classifier("Birds are a type of animal") + self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}]) + + def run_pipeline_test(self, model, tokenizer): + text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer) + + # Small inputs because BartTokenizer tiny has maximum position embeddings = 22 + valid_inputs = "HuggingFace is in" + outputs = text_classifier(valid_inputs) + + self.assertEqual(nested_simplify(outputs), [{"label": ANY(str), "score": ANY(float)}]) + self.assertTrue(outputs[0]["label"] in model.config.id2label.values()) + + valid_inputs = ["HuggingFace is in ", "Paris is in France"] + outputs = text_classifier(valid_inputs) + self.assertEqual( + nested_simplify(outputs), + [{"label": ANY(str), "score": ANY(float)}, {"label": ANY(str), "score": ANY(float)}], + ) + self.assertTrue(outputs[0]["label"] in model.config.id2label.values()) + self.assertTrue(outputs[1]["label"] in model.config.id2label.values())