From 6b24281229e4ec0754f79735e398730d9d58e8af Mon Sep 17 00:00:00 2001 From: Prajjwal Bhargava Date: Wed, 2 Sep 2020 16:25:37 +0530 Subject: [PATCH] fix typo in comments (#6838) --- src/transformers/modeling_bert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/modeling_bert.py b/src/transformers/modeling_bert.py index 8d70744e7b..dd8add738d 100755 --- a/src/transformers/modeling_bert.py +++ b/src/transformers/modeling_bert.py @@ -803,8 +803,8 @@ class BertModel(BertPreTrainedModel): # ourselves in which case we just need to make it broadcastable to all heads. extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape, device) - # If a 2D ou 3D attention mask is provided for the cross-attention - # we need to make broadcastabe to [batch_size, num_heads, seq_length, seq_length] + # If a 2D or 3D attention mask is provided for the cross-attention + # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length] if self.config.is_decoder and encoder_hidden_states is not None: encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size() encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)