update all tf.shape and tensor.shape to shape_list

2019-11-28 15:51:43 +01:00
parent 1ab8dc44b3
commit adb5c79ff2
13 changed files with 48 additions and 54 deletions
--- a/transformers/modeling_tf_bert.py
+++ b/transformers/modeling_tf_bert.py
@@ -28,7 +28,7 @@ import numpy as np
 import tensorflow as tf

 from .configuration_bert import BertConfig
-from .modeling_tf_utils import TFPreTrainedModel, get_initializer
+from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list
 from .file_utils import add_start_docstrings

 logger = logging.getLogger(__name__)
@@ -145,9 +145,9 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
        input_ids, position_ids, token_type_ids, inputs_embeds = inputs

        if input_ids is not None:
-            input_shape = tf.shape(input_ids)
+            input_shape = shape_list(input_ids)
        else:
-            input_shape = tf.shape(inputs_embeds)[:-1]
+            input_shape = shape_list(inputs_embeds)[:-1]
        
        seq_length = input_shape[1]
        if position_ids is None:
@@ -172,8 +172,8 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
            Returns:
                float32 tensor with shape [batch_size, length, vocab_size].
        """
-        batch_size = tf.shape(inputs)[0]
-        length = tf.shape(inputs)[1]
+        batch_size = shape_list(inputs)[0]
+        length = shape_list(inputs)[1]

        x = tf.reshape(inputs, [-1, self.hidden_size])
        logits = tf.matmul(x, self.word_embeddings, transpose_b=True)
@@ -214,7 +214,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
    def call(self, inputs, training=False):
        hidden_states, attention_mask, head_mask = inputs

-        batch_size = tf.shape(hidden_states)[0]
+        batch_size = shape_list(hidden_states)[0]
        mixed_query_layer = self.query(hidden_states)
        mixed_key_layer = self.key(hidden_states)
        mixed_value_layer = self.value(hidden_states)
@@ -225,7 +225,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):

        # Take the dot product between "query" and "key" to get the raw attention scores.
        attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True)  # (batch size, num_heads, seq_len_q, seq_len_k)
-        dk = tf.cast(tf.shape(key_layer)[-1], tf.float32) # scale attention_scores
+        dk = tf.cast(shape_list(key_layer)[-1], tf.float32) # scale attention_scores
        attention_scores = attention_scores / tf.math.sqrt(dk)

        if attention_mask is not None:
@@ -502,9 +502,9 @@ class TFBertMainLayer(tf.keras.layers.Layer):
        if input_ids is not None and inputs_embeds is not None:
            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
        elif input_ids is not None:
-            input_shape = input_ids.shape
+            input_shape = shape_list(input_ids)
        elif inputs_embeds is not None:
-            input_shape = inputs_embeds.shape[:-1]
+            input_shape = shape_list(inputs_embeds)[:-1]
        else:
            raise ValueError("You have to specify either input_ids or inputs_embeds")

@@ -939,11 +939,11 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
            input_ids = inputs

        if input_ids is not None:
-            num_choices = tf.shape(input_ids)[1]
-            seq_length = tf.shape(input_ids)[2]
+            num_choices = shape_list(input_ids)[1]
+            seq_length = shape_list(input_ids)[2]
        else:
-            num_choices = tf.shape(inputs_embeds)[1]
-            seq_length = tf.shape(inputs_embeds)[2]
+            num_choices = shape_list(inputs_embeds)[1]
+            seq_length = shape_list(inputs_embeds)[2]

        flat_input_ids = tf.reshape(input_ids, (-1, seq_length)) if input_ids is not None else None
        flat_attention_mask = tf.reshape(attention_mask, (-1, seq_length)) if attention_mask is not None else None