[Pipelines] Add revision tag to all default pipelines (#17667)
* trigger test failure * upload revision poc * Update src/transformers/pipelines/base.py Co-authored-by: Julien Chaumond <julien@huggingface.co> * up * add test * correct some stuff * Update src/transformers/pipelines/__init__.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * correct require flag Co-authored-by: Julien Chaumond <julien@huggingface.co> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
4f8361afe7
commit
e4d2588573
@@ -22,6 +22,8 @@ from abc import abstractmethod
|
||||
from functools import lru_cache
|
||||
from unittest import skipIf
|
||||
|
||||
import numpy as np
|
||||
|
||||
from transformers import (
|
||||
FEATURE_EXTRACTOR_MAPPING,
|
||||
TOKENIZER_MAPPING,
|
||||
@@ -35,7 +37,15 @@ from transformers import (
|
||||
)
|
||||
from transformers.pipelines import get_task
|
||||
from transformers.pipelines.base import _pad
|
||||
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch
|
||||
from transformers.testing_utils import (
|
||||
is_pipeline_test,
|
||||
nested_simplify,
|
||||
require_scatter,
|
||||
require_tensorflow_probability,
|
||||
require_tf,
|
||||
require_torch,
|
||||
slow,
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -461,8 +471,8 @@ class PipelinePadTest(unittest.TestCase):
|
||||
|
||||
|
||||
@is_pipeline_test
|
||||
@require_torch
|
||||
class PipelineUtilsTest(unittest.TestCase):
|
||||
@require_torch
|
||||
def test_pipeline_dataset(self):
|
||||
from transformers.pipelines.pt_utils import PipelineDataset
|
||||
|
||||
@@ -476,6 +486,7 @@ class PipelineUtilsTest(unittest.TestCase):
|
||||
outputs = [dataset[i] for i in range(4)]
|
||||
self.assertEqual(outputs, [2, 3, 4, 5])
|
||||
|
||||
@require_torch
|
||||
def test_pipeline_iterator(self):
|
||||
from transformers.pipelines.pt_utils import PipelineIterator
|
||||
|
||||
@@ -490,6 +501,7 @@ class PipelineUtilsTest(unittest.TestCase):
|
||||
outputs = [item for item in dataset]
|
||||
self.assertEqual(outputs, [2, 3, 4, 5])
|
||||
|
||||
@require_torch
|
||||
def test_pipeline_iterator_no_len(self):
|
||||
from transformers.pipelines.pt_utils import PipelineIterator
|
||||
|
||||
@@ -507,6 +519,7 @@ class PipelineUtilsTest(unittest.TestCase):
|
||||
outputs = [item for item in dataset]
|
||||
self.assertEqual(outputs, [2, 3, 4, 5])
|
||||
|
||||
@require_torch
|
||||
def test_pipeline_batch_unbatch_iterator(self):
|
||||
from transformers.pipelines.pt_utils import PipelineIterator
|
||||
|
||||
@@ -520,6 +533,7 @@ class PipelineUtilsTest(unittest.TestCase):
|
||||
outputs = [item for item in dataset]
|
||||
self.assertEqual(outputs, [{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}])
|
||||
|
||||
@require_torch
|
||||
def test_pipeline_batch_unbatch_iterator_tensors(self):
|
||||
import torch
|
||||
|
||||
@@ -537,6 +551,7 @@ class PipelineUtilsTest(unittest.TestCase):
|
||||
nested_simplify(outputs), [{"id": [[12, 22]]}, {"id": [[2, 3]]}, {"id": [[2, 4]]}, {"id": [[5]]}]
|
||||
)
|
||||
|
||||
@require_torch
|
||||
def test_pipeline_chunk_iterator(self):
|
||||
from transformers.pipelines.pt_utils import PipelineChunkIterator
|
||||
|
||||
@@ -552,6 +567,7 @@ class PipelineUtilsTest(unittest.TestCase):
|
||||
|
||||
self.assertEqual(outputs, [0, 1, 0, 1, 2])
|
||||
|
||||
@require_torch
|
||||
def test_pipeline_pack_iterator(self):
|
||||
from transformers.pipelines.pt_utils import PipelinePackIterator
|
||||
|
||||
@@ -584,6 +600,7 @@ class PipelineUtilsTest(unittest.TestCase):
|
||||
],
|
||||
)
|
||||
|
||||
@require_torch
|
||||
def test_pipeline_pack_unbatch_iterator(self):
|
||||
from transformers.pipelines.pt_utils import PipelinePackIterator
|
||||
|
||||
@@ -607,3 +624,125 @@ class PipelineUtilsTest(unittest.TestCase):
|
||||
|
||||
outputs = [item for item in dataset]
|
||||
self.assertEqual(outputs, [[{"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}]])
|
||||
|
||||
@slow
|
||||
@require_torch
|
||||
def test_load_default_pipelines_pt(self):
|
||||
import torch
|
||||
|
||||
from transformers.pipelines import SUPPORTED_TASKS
|
||||
|
||||
set_seed_fn = lambda: torch.manual_seed(0) # noqa: E731
|
||||
for task in SUPPORTED_TASKS.keys():
|
||||
if task == "table-question-answering":
|
||||
# test table in seperate test due to more dependencies
|
||||
continue
|
||||
|
||||
self.check_default_pipeline(task, "pt", set_seed_fn, self.check_models_equal_pt)
|
||||
|
||||
@slow
|
||||
@require_tf
|
||||
def test_load_default_pipelines_tf(self):
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers.pipelines import SUPPORTED_TASKS
|
||||
|
||||
set_seed_fn = lambda: tf.random.set_seed(0) # noqa: E731
|
||||
for task in SUPPORTED_TASKS.keys():
|
||||
if task == "table-question-answering":
|
||||
# test table in seperate test due to more dependencies
|
||||
continue
|
||||
|
||||
self.check_default_pipeline(task, "tf", set_seed_fn, self.check_models_equal_tf)
|
||||
|
||||
@slow
|
||||
@require_torch
|
||||
@require_scatter
|
||||
def test_load_default_pipelines_pt_table_qa(self):
|
||||
import torch
|
||||
|
||||
set_seed_fn = lambda: torch.manual_seed(0) # noqa: E731
|
||||
self.check_default_pipeline("table-question-answering", "pt", set_seed_fn, self.check_models_equal_pt)
|
||||
|
||||
@slow
|
||||
@require_tf
|
||||
@require_tensorflow_probability
|
||||
def test_load_default_pipelines_tf_table_qa(self):
|
||||
import tensorflow as tf
|
||||
|
||||
set_seed_fn = lambda: tf.random.set_seed(0) # noqa: E731
|
||||
self.check_default_pipeline("table-question-answering", "tf", set_seed_fn, self.check_models_equal_tf)
|
||||
|
||||
def check_default_pipeline(self, task, framework, set_seed_fn, check_models_equal_fn):
|
||||
from transformers.pipelines import SUPPORTED_TASKS, pipeline
|
||||
|
||||
task_dict = SUPPORTED_TASKS[task]
|
||||
# test to compare pipeline to manually loading the respective model
|
||||
model = None
|
||||
relevant_auto_classes = task_dict[framework]
|
||||
|
||||
if len(relevant_auto_classes) == 0:
|
||||
# task has no default
|
||||
logger.debug(f"{task} in {framework} has no default")
|
||||
return
|
||||
|
||||
# by default use first class
|
||||
auto_model_cls = relevant_auto_classes[0]
|
||||
|
||||
# retrieve correct model ids
|
||||
if task == "translation":
|
||||
# special case for translation pipeline which has multiple languages
|
||||
model_ids = []
|
||||
revisions = []
|
||||
tasks = []
|
||||
for translation_pair in task_dict["default"].keys():
|
||||
model_id, revision = task_dict["default"][translation_pair]["model"][framework]
|
||||
|
||||
model_ids.append(model_id)
|
||||
revisions.append(revision)
|
||||
tasks.append(task + f"_{'_to_'.join(translation_pair)}")
|
||||
else:
|
||||
# normal case - non-translation pipeline
|
||||
model_id, revision = task_dict["default"]["model"][framework]
|
||||
|
||||
model_ids = [model_id]
|
||||
revisions = [revision]
|
||||
tasks = [task]
|
||||
|
||||
# check for equality
|
||||
for model_id, revision, task in zip(model_ids, revisions, tasks):
|
||||
# load default model
|
||||
try:
|
||||
set_seed_fn()
|
||||
model = auto_model_cls.from_pretrained(model_id, revision=revision)
|
||||
except ValueError:
|
||||
# first auto class is possible not compatible with model, go to next model class
|
||||
auto_model_cls = relevant_auto_classes[1]
|
||||
set_seed_fn()
|
||||
model = auto_model_cls.from_pretrained(model_id, revision=revision)
|
||||
|
||||
# load default pipeline
|
||||
set_seed_fn()
|
||||
default_pipeline = pipeline(task, framework=framework)
|
||||
|
||||
# compare pipeline model with default model
|
||||
models_are_equal = check_models_equal_fn(default_pipeline.model, model)
|
||||
self.assertTrue(models_are_equal, f"{task} model doesn't match pipeline.")
|
||||
|
||||
logger.debug(f"{task} in {framework} succeeded with {model_id}.")
|
||||
|
||||
def check_models_equal_pt(self, model1, model2):
|
||||
models_are_equal = True
|
||||
for model1_p, model2_p in zip(model1.parameters(), model2.parameters()):
|
||||
if model1_p.data.ne(model2_p.data).sum() > 0:
|
||||
models_are_equal = False
|
||||
|
||||
return models_are_equal
|
||||
|
||||
def check_models_equal_tf(self, model1, model2):
|
||||
models_are_equal = True
|
||||
for model1_p, model2_p in zip(model1.weights, model2.weights):
|
||||
if np.abs(model1_p.numpy() - model2_p.numpy()).sum() > 1e-5:
|
||||
models_are_equal = False
|
||||
|
||||
return models_are_equal
|
||||
|
||||
Reference in New Issue
Block a user