Moving text-generation pipeline to new testing framework. (#13285)
* Moving `text-generation` pipeline to new testing framework. * Keep check_model_type but log instead of raise Exception. * warning -> error.
This commit is contained in:
@@ -735,10 +735,8 @@ class Pipeline(_ScikitCompat):
|
|||||||
supported_models_names.append(model.__name__)
|
supported_models_names.append(model.__name__)
|
||||||
supported_models = supported_models_names
|
supported_models = supported_models_names
|
||||||
if self.model.__class__.__name__ not in supported_models:
|
if self.model.__class__.__name__ not in supported_models:
|
||||||
raise PipelineException(
|
logger.error(
|
||||||
self.task,
|
f"The model '{self.model.__class__.__name__}' is not supported for {self.task}. Supported models are {supported_models}."
|
||||||
self.model.base_model_prefix,
|
|
||||||
f"The model '{self.model.__class__.__name__}' is not supported for {self.task}. Supported models are {supported_models}",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _parse_and_tokenize(
|
def _parse_and_tokenize(
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
from transformers import MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING
|
||||||
|
|
||||||
from ..file_utils import add_end_docstrings
|
from ..file_utils import add_end_docstrings
|
||||||
from .base import PIPELINE_INIT_ARGS, Pipeline
|
from .base import PIPELINE_INIT_ARGS, Pipeline
|
||||||
|
|
||||||
@@ -30,25 +32,12 @@ class TextGenerationPipeline(Pipeline):
|
|||||||
begging for his blessing. <eod> </s> <eos>
|
begging for his blessing. <eod> </s> <eos>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
ALLOWED_MODELS = [
|
|
||||||
"XLNetLMHeadModel",
|
|
||||||
"TransfoXLLMHeadModel",
|
|
||||||
"ReformerModelWithLMHead",
|
|
||||||
"GPT2LMHeadModel",
|
|
||||||
"GPTNeoForCausalLM",
|
|
||||||
"OpenAIGPTLMHeadModel",
|
|
||||||
"CTRLLMHeadModel",
|
|
||||||
"TFXLNetLMHeadModel",
|
|
||||||
"TFTransfoXLLMHeadModel",
|
|
||||||
"TFGPT2LMHeadModel",
|
|
||||||
"TFOpenAIGPTLMHeadModel",
|
|
||||||
"TFCTRLLMHeadModel",
|
|
||||||
]
|
|
||||||
|
|
||||||
def __init__(self, *args, return_full_text=True, **kwargs):
|
def __init__(self, *args, return_full_text=True, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
self.check_model_type(
|
||||||
|
TF_MODEL_FOR_CAUSAL_LM_MAPPING if self.framework == "tf" else MODEL_FOR_CAUSAL_LM_MAPPING
|
||||||
|
)
|
||||||
|
|
||||||
self.check_model_type(self.ALLOWED_MODELS)
|
|
||||||
self.return_full_text = return_full_text
|
self.return_full_text = return_full_text
|
||||||
|
|
||||||
# overriding _parse_and_tokenize to allow for unusual language-modeling tokenizer arguments
|
# overriding _parse_and_tokenize to allow for unusual language-modeling tokenizer arguments
|
||||||
@@ -124,6 +113,9 @@ class TextGenerationPipeline(Pipeline):
|
|||||||
prefix_length = prefix_inputs["input_ids"].shape[-1]
|
prefix_length = prefix_inputs["input_ids"].shape[-1]
|
||||||
if generate_kwargs.get("max_length", None) is not None:
|
if generate_kwargs.get("max_length", None) is not None:
|
||||||
generate_kwargs["max_length"] += prefix_length
|
generate_kwargs["max_length"] += prefix_length
|
||||||
|
else:
|
||||||
|
generate_kwargs["max_length"] = self.model.config.max_length + prefix_length
|
||||||
|
|
||||||
if generate_kwargs.get("min_length", None) is not None:
|
if generate_kwargs.get("min_length", None) is not None:
|
||||||
generate_kwargs["min_length"] += prefix_length
|
generate_kwargs["min_length"] += prefix_length
|
||||||
|
|
||||||
|
|||||||
@@ -14,49 +14,95 @@
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from transformers import pipeline
|
from transformers import MODEL_FOR_CAUSAL_LM_MAPPING, TF_MODEL_FOR_CAUSAL_LM_MAPPING, TextGenerationPipeline, pipeline
|
||||||
from transformers.testing_utils import require_torch
|
from transformers.testing_utils import is_pipeline_test, require_tf, require_torch
|
||||||
|
|
||||||
from .test_pipelines_common import MonoInputPipelineCommonMixin
|
from .test_pipelines_common import ANY, PipelineTestCaseMeta
|
||||||
|
|
||||||
|
|
||||||
class TextGenerationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
|
@is_pipeline_test
|
||||||
pipeline_task = "text-generation"
|
class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||||
pipeline_running_kwargs = {"prefix": "This is "}
|
model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
|
||||||
small_models = ["sshleifer/tiny-ctrl"] # Models tested without the @slow decorator
|
tf_model_mapping = TF_MODEL_FOR_CAUSAL_LM_MAPPING
|
||||||
large_models = [] # Models tested with the @slow decorator
|
|
||||||
|
|
||||||
def test_simple_generation(self):
|
|
||||||
text_generator = pipeline(task="text-generation", model=self.small_models[0])
|
|
||||||
# text-generation is non-deterministic by nature, we can't fully test the output
|
|
||||||
|
|
||||||
outputs = text_generator("This is a test")
|
|
||||||
|
|
||||||
self.assertEqual(len(outputs), 1)
|
|
||||||
self.assertEqual(list(outputs[0].keys()), ["generated_text"])
|
|
||||||
self.assertEqual(type(outputs[0]["generated_text"]), str)
|
|
||||||
|
|
||||||
outputs = text_generator(["This is a test", "This is a second test"])
|
|
||||||
self.assertEqual(len(outputs[0]), 1)
|
|
||||||
self.assertEqual(list(outputs[0][0].keys()), ["generated_text"])
|
|
||||||
self.assertEqual(type(outputs[0][0]["generated_text"]), str)
|
|
||||||
self.assertEqual(list(outputs[1][0].keys()), ["generated_text"])
|
|
||||||
self.assertEqual(type(outputs[1][0]["generated_text"]), str)
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
def test_generation_output_style(self):
|
def test_small_model_pt(self):
|
||||||
text_generator = pipeline(task="text-generation", model=self.small_models[0])
|
text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="pt")
|
||||||
# text-generation is non-deterministic by nature, we can't fully test the output
|
# Using `do_sample=False` to force deterministic output
|
||||||
|
outputs = text_generator("This is a test", do_sample=False)
|
||||||
|
self.assertEqual(
|
||||||
|
outputs,
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"generated_text": "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = text_generator(["This is a test", "This is a second test"])
|
||||||
|
self.assertEqual(
|
||||||
|
outputs,
|
||||||
|
[
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"generated_text": "This is a test ☃ ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"generated_text": "This is a second test ☃ segmental segmental segmental 议议eski eski flutter flutter Lacy oscope. oscope. FiliFili@@"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
@require_tf
|
||||||
|
def test_small_model_tf(self):
|
||||||
|
text_generator = pipeline(task="text-generation", model="sshleifer/tiny-ctrl", framework="tf")
|
||||||
|
|
||||||
|
# Using `do_sample=False` to force deterministic output
|
||||||
|
outputs = text_generator("This is a test", do_sample=False)
|
||||||
|
self.assertEqual(
|
||||||
|
outputs,
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"generated_text": "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
outputs = text_generator(["This is a test", "This is a second test"], do_sample=False)
|
||||||
|
self.assertEqual(
|
||||||
|
outputs,
|
||||||
|
[
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"generated_text": "This is a test FeyFeyFey(Croatis.), s.), Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"generated_text": "This is a second test Chieftain Chieftain prefecture prefecture prefecture Cannes Cannes Cannes 閲閲Cannes Cannes Cannes 攵 please,"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_pipeline_test(self, model, tokenizer, feature_extractor):
|
||||||
|
text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
||||||
outputs = text_generator("This is a test")
|
outputs = text_generator("This is a test")
|
||||||
self.assertIn("This is a test", outputs[0]["generated_text"])
|
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
|
||||||
|
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
|
||||||
|
|
||||||
outputs = text_generator("This is a test", return_full_text=False)
|
outputs = text_generator("This is a test", return_full_text=False)
|
||||||
|
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
|
||||||
self.assertNotIn("This is a test", outputs[0]["generated_text"])
|
self.assertNotIn("This is a test", outputs[0]["generated_text"])
|
||||||
|
|
||||||
text_generator = pipeline(task="text-generation", model=self.small_models[0], return_full_text=False)
|
text_generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer, return_full_text=False)
|
||||||
outputs = text_generator("This is a test")
|
outputs = text_generator("This is a test")
|
||||||
|
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
|
||||||
self.assertNotIn("This is a test", outputs[0]["generated_text"])
|
self.assertNotIn("This is a test", outputs[0]["generated_text"])
|
||||||
|
|
||||||
outputs = text_generator("This is a test", return_full_text=True)
|
outputs = text_generator("This is a test", return_full_text=True)
|
||||||
self.assertIn("This is a test", outputs[0]["generated_text"])
|
self.assertEqual(outputs, [{"generated_text": ANY(str)}])
|
||||||
|
self.assertTrue(outputs[0]["generated_text"].startswith("This is a test"))
|
||||||
|
|||||||
Reference in New Issue
Block a user