From c4158a63141261ae51126990d6006f1c521ebc17 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 20 Apr 2020 20:39:16 +0200 Subject: [PATCH] [Pipelines] Encode to max length of input not max length of tokenizer for batch input (#3857) * remove max_length = tokenizer.max_length when encoding * make style --- src/transformers/pipelines.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py index 2b8084edba..3f8450ac14 100755 --- a/src/transformers/pipelines.py +++ b/src/transformers/pipelines.py @@ -425,11 +425,7 @@ class Pipeline(_ScikitCompat): # Parse arguments inputs = self._args_parser(*texts, **kwargs) inputs = self.tokenizer.batch_encode_plus( - inputs, - add_special_tokens=True, - return_tensors=self.framework, - max_length=self.tokenizer.max_len, - pad_to_max_length=pad_to_max_length, + inputs, add_special_tokens=True, return_tensors=self.framework, pad_to_max_length=pad_to_max_length, ) return inputs