Fix E714 flake8 warning (x8).
This commit is contained in:
@@ -519,7 +519,7 @@ class MultiHeadedAttention(nn.Module):
|
|||||||
|
|
||||||
attn = self.softmax(scores)
|
attn = self.softmax(scores)
|
||||||
|
|
||||||
if not predefined_graph_1 is None:
|
if predefined_graph_1 is not None:
|
||||||
attn_masked = attn[:, -1] * predefined_graph_1
|
attn_masked = attn[:, -1] * predefined_graph_1
|
||||||
attn_masked = attn_masked / (torch.sum(attn_masked, 2).unsqueeze(2) + 1e-9)
|
attn_masked = attn_masked / (torch.sum(attn_masked, 2).unsqueeze(2) + 1e-9)
|
||||||
|
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
|
|||||||
# attention_probs has shape bsz x n_heads x N x N
|
# attention_probs has shape bsz x n_heads x N x N
|
||||||
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
||||||
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
||||||
if not head_mask is None:
|
if head_mask is not None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
head_mask = [None] * self.num_hidden_layers
|
head_mask = [None] * self.num_hidden_layers
|
||||||
|
|||||||
@@ -686,7 +686,7 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
|
|||||||
# attention_probs has shape bsz x n_heads x N x N
|
# attention_probs has shape bsz x n_heads x N x N
|
||||||
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
||||||
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
||||||
if not head_mask is None:
|
if head_mask is not None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
head_mask = [None] * self.num_hidden_layers
|
head_mask = [None] * self.num_hidden_layers
|
||||||
|
|||||||
@@ -562,7 +562,7 @@ class TFBertMainLayer(tf.keras.layers.Layer):
|
|||||||
# attention_probs has shape bsz x n_heads x N x N
|
# attention_probs has shape bsz x n_heads x N x N
|
||||||
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
||||||
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
||||||
if not head_mask is None:
|
if head_mask is not None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
head_mask = [None] * self.num_hidden_layers
|
head_mask = [None] * self.num_hidden_layers
|
||||||
|
|||||||
@@ -311,7 +311,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
|
|||||||
# attention_probs has shape bsz x n_heads x N x N
|
# attention_probs has shape bsz x n_heads x N x N
|
||||||
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
||||||
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
||||||
if not head_mask is None:
|
if head_mask is not None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
head_mask = [None] * self.num_hidden_layers
|
head_mask = [None] * self.num_hidden_layers
|
||||||
|
|||||||
@@ -303,7 +303,7 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer):
|
|||||||
# attention_probs has shape bsz x n_heads x N x N
|
# attention_probs has shape bsz x n_heads x N x N
|
||||||
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
||||||
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
||||||
if not head_mask is None:
|
if head_mask is not None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
head_mask = [None] * self.num_hidden_layers
|
head_mask = [None] * self.num_hidden_layers
|
||||||
|
|||||||
@@ -456,7 +456,7 @@ class TFT5MainLayer(tf.keras.layers.Layer):
|
|||||||
# attention_probs has shape bsz x n_heads x N x N
|
# attention_probs has shape bsz x n_heads x N x N
|
||||||
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
|
||||||
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
|
||||||
if not head_mask is None:
|
if head_mask is not None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
head_mask = [None] * self.num_hidden_layers
|
head_mask = [None] * self.num_hidden_layers
|
||||||
|
|||||||
@@ -554,7 +554,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
|
|||||||
# attention_probs has shape bsz x n_heads x N x N
|
# attention_probs has shape bsz x n_heads x N x N
|
||||||
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] (a head_mask for each layer)
|
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] (a head_mask for each layer)
|
||||||
# and head_mask is converted to shape [num_hidden_layers x qlen x klen x bsz x n_head]
|
# and head_mask is converted to shape [num_hidden_layers x qlen x klen x bsz x n_head]
|
||||||
if not head_mask is None:
|
if head_mask is not None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
head_mask = [None] * self.n_layer
|
head_mask = [None] * self.n_layer
|
||||||
|
|||||||
Reference in New Issue
Block a user