From 8453201cfe8656654d3e0fdaf6dd6c0346ecfafe Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Thu, 3 Dec 2020 10:45:07 -0500 Subject: [PATCH] Avoid erasing the attention mask when double padding (#8915) --- src/transformers/tokenization_utils_base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index d8e73217ee..289e373da5 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -3047,9 +3047,8 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): encoded_inputs["input_ids"] = [self.pad_token_id] * difference + encoded_inputs["input_ids"] else: raise ValueError("Invalid padding strategy:" + str(self.padding_side)) - else: - if return_attention_mask: - encoded_inputs["attention_mask"] = [1] * len(encoded_inputs["input_ids"]) + elif return_attention_mask and "attention_mask" not in encoded_inputs: + encoded_inputs["attention_mask"] = [1] * len(encoded_inputs["input_ids"]) return encoded_inputs