add return_tensor parameter for feature extraction (#19257)

* add return_tensors parameter for feature_extraction  w/ test

add return_tensor parameter for feature extraction

Revert "Merge branch 'feature-extraction-return-tensor' of https://github.com/ajsanjoaquin/transformers into feature-extraction-return-tensor"

This reverts commit d559da743b87914e111a84a98ba6dbb70d08ad88, reversing
changes made to bbef89278650c04c090beb65637a8e9572dba222.

* call parameter directly

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* Fixup.

* Update src/transformers/pipelines/feature_extraction.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Ayrton San Joaquin
2022-10-17 17:17:26 +02:00
committed by GitHub
parent 59e29be363
commit 35bd089a24
2 changed files with 29 additions and 3 deletions

View File

@@ -31,6 +31,8 @@ class FeatureExtractionPipeline(Pipeline):
If no framework is specified, will default to the one currently installed. If no framework is specified and If no framework is specified, will default to the one currently installed. If no framework is specified and
both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is
provided. provided.
return_tensor (`bool`, *optional*):
If `True`, returns a tensor according to the specified framework, otherwise returns a list.
task (`str`, defaults to `""`): task (`str`, defaults to `""`):
A task-identifier for the pipeline. A task-identifier for the pipeline.
args_parser ([`~pipelines.ArgumentHandler`], *optional*): args_parser ([`~pipelines.ArgumentHandler`], *optional*):
@@ -40,7 +42,7 @@ class FeatureExtractionPipeline(Pipeline):
the associated CUDA device id. the associated CUDA device id.
""" """
def _sanitize_parameters(self, truncation=None, tokenize_kwargs=None, **kwargs): def _sanitize_parameters(self, truncation=None, tokenize_kwargs=None, return_tensors=None, **kwargs):
if tokenize_kwargs is None: if tokenize_kwargs is None:
tokenize_kwargs = {} tokenize_kwargs = {}
@@ -53,7 +55,11 @@ class FeatureExtractionPipeline(Pipeline):
preprocess_params = tokenize_kwargs preprocess_params = tokenize_kwargs
return preprocess_params, {}, {} postprocess_params = {}
if return_tensors is not None:
postprocess_params["return_tensors"] = return_tensors
return preprocess_params, {}, postprocess_params
def preprocess(self, inputs, **tokenize_kwargs) -> Dict[str, GenericTensor]: def preprocess(self, inputs, **tokenize_kwargs) -> Dict[str, GenericTensor]:
return_tensors = self.framework return_tensors = self.framework
@@ -64,8 +70,10 @@ class FeatureExtractionPipeline(Pipeline):
model_outputs = self.model(**model_inputs) model_outputs = self.model(**model_inputs)
return model_outputs return model_outputs
def postprocess(self, model_outputs): def postprocess(self, model_outputs, return_tensors=False):
# [0] is the first available tensor, logits or last_hidden_state. # [0] is the first available tensor, logits or last_hidden_state.
if return_tensors:
return model_outputs[0]
if self.framework == "pt": if self.framework == "pt":
return model_outputs[0].tolist() return model_outputs[0].tolist()
elif self.framework == "tf": elif self.framework == "tf":

View File

@@ -15,6 +15,8 @@
import unittest import unittest
import numpy as np import numpy as np
import tensorflow as tf
import torch
from transformers import ( from transformers import (
FEATURE_EXTRACTOR_MAPPING, FEATURE_EXTRACTOR_MAPPING,
@@ -133,6 +135,22 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
tokenize_kwargs=tokenize_kwargs, tokenize_kwargs=tokenize_kwargs,
) )
@require_torch
def test_return_tensors_pt(self):
feature_extractor = pipeline(
task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="pt"
)
outputs = feature_extractor("This is a test" * 100, return_tensors=True)
self.assertTrue(torch.is_tensor(outputs))
@require_tf
def test_return_tensors_tf(self):
feature_extractor = pipeline(
task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="tf"
)
outputs = feature_extractor("This is a test" * 100, return_tensors=True)
self.assertTrue(tf.is_tensor(outputs))
def get_shape(self, input_, shape=None): def get_shape(self, input_, shape=None):
if shape is None: if shape is None:
shape = [] shape = []