From 28d0ba35d73d5b8b31fdadd72686a3ac078a6143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Louf?= Date: Fri, 8 Nov 2019 11:22:19 +0100 Subject: [PATCH] only init encoder_attention_mask if stack is decoder We currently initialize `encoder_attention_mask` when it is `None`, whether the stack is that of an encoder or a decoder. Since this may lead to bugs that are difficult to tracks down, I added a condition that assesses whether the current stack is a decoder. --- transformers/modeling_bert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformers/modeling_bert.py b/transformers/modeling_bert.py index 7c2c6f4602..6bd5ab6a2e 100644 --- a/transformers/modeling_bert.py +++ b/transformers/modeling_bert.py @@ -656,7 +656,7 @@ class BertModel(BertPreTrainedModel): if attention_mask is None: attention_mask = torch.ones(input_shape, device=device) - if encoder_attention_mask is None: + if self.config.is_decoder and encoder_attention_mask is None: encoder_attention_mask = torch.ones(input_shape, device=device) if token_type_ids is None: token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)