From a163c9ca5b720adeac1f3f4cbb83a7ef6112061a Mon Sep 17 00:00:00 2001 From: ZhuBaohe Date: Tue, 26 May 2020 20:57:24 +0800 Subject: [PATCH] [T5] Fix Cross Attention position bias (#4499) * fix * fix1 --- src/transformers/modeling_t5.py | 2 +- src/transformers/modeling_tf_t5.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/modeling_t5.py b/src/transformers/modeling_t5.py index b363316337..f5b74e7a79 100644 --- a/src/transformers/modeling_t5.py +++ b/src/transformers/modeling_t5.py @@ -745,7 +745,7 @@ class T5Stack(T5PreTrainedModel): # layer_outputs = hidden-states, key-value-states (self-attention weights), (self-attention position bias), (cross-attention weights), (cross-attention position bias) position_bias = layer_outputs[3 if self.output_attentions else 2] if self.is_decoder and encoder_hidden_states is not None: - encoder_decoder_position_bias = layer_outputs[4 if self.output_attentions else 3] + encoder_decoder_position_bias = layer_outputs[5 if self.output_attentions else 3] # append next layer key value states present_key_value_states = present_key_value_states + (present_key_value_state,) diff --git a/src/transformers/modeling_tf_t5.py b/src/transformers/modeling_tf_t5.py index 64583f23fd..8f429bb354 100644 --- a/src/transformers/modeling_tf_t5.py +++ b/src/transformers/modeling_tf_t5.py @@ -682,7 +682,7 @@ class TFT5MainLayer(tf.keras.layers.Layer): # layer_outputs = hidden-states, (self-attention weights), (self-attention position bias), (cross-attention weights), (cross-attention position bias) position_bias = layer_outputs[3 if self.output_attentions else 2] if self.is_decoder and encoder_hidden_states is not None: - encoder_decoder_position_bias = layer_outputs[4 if self.output_attentions else 3] + encoder_decoder_position_bias = layer_outputs[5 if self.output_attentions else 3] # append next layer key value states present_key_value_states = present_key_value_states + (present_key_value_state,)