From c38f4e1f1ca68b7180e4986afb7e3a3e62093c8a Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 9 Jun 2022 19:04:42 +0200 Subject: [PATCH] Running a pipeline of `float16`. (#17637) When we're preparing the tensors for CPU for postprocessing, we need to upgrade the `float16` to `float32` since CPUs don't have instructions for `[b]float16`. --- src/transformers/pipelines/base.py | 2 ++ tests/pipelines/test_pipelines_fill_mask.py | 22 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 21311da8e2..1565463e0e 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -869,6 +869,8 @@ class Pipeline(_ScikitCompat): elif isinstance(inputs, tuple): return tuple([self._ensure_tensor_on_device(item, device) for item in inputs]) elif isinstance(inputs, torch.Tensor): + if device == torch.device("cpu") and inputs.dtype in {torch.float16, torch.bfloat16}: + inputs = inputs.float() return inputs.to(device) else: return inputs diff --git a/tests/pipelines/test_pipelines_fill_mask.py b/tests/pipelines/test_pipelines_fill_mask.py index ed551bf6f4..d85ab8d7ce 100644 --- a/tests/pipelines/test_pipelines_fill_mask.py +++ b/tests/pipelines/test_pipelines_fill_mask.py @@ -16,7 +16,14 @@ import unittest from transformers import MODEL_FOR_MASKED_LM_MAPPING, TF_MODEL_FOR_MASKED_LM_MAPPING, FillMaskPipeline, pipeline from transformers.pipelines import PipelineException -from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch, slow +from transformers.testing_utils import ( + is_pipeline_test, + nested_simplify, + require_tf, + require_torch, + require_torch_gpu, + slow, +) from .test_pipelines_common import ANY, PipelineTestCaseMeta @@ -130,6 +137,19 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): ], ) + @require_torch_gpu + def test_fp16_casting(self): + pipe = pipeline("fill-mask", model="hf-internal-testing/tiny-random-distilbert", device=0, framework="pt") + + # convert model to fp16 + pipe.model.half() + + response = pipe("Paris is the [MASK] of France.") + # We actually don't care about the result, we just want to make sure + # it works, meaning the float16 tensor got casted back to float32 + # for postprocessing. + self.assertIsInstance(response, list) + @slow @require_torch def test_large_model_pt(self):