update all tf.shape and tensor.shape to shape_list
This commit is contained in:
@@ -28,7 +28,7 @@ import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from .configuration_bert import BertConfig
|
||||
from .modeling_tf_utils import TFPreTrainedModel, get_initializer
|
||||
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list
|
||||
from .file_utils import add_start_docstrings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -145,9 +145,9 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
|
||||
input_ids, position_ids, token_type_ids, inputs_embeds = inputs
|
||||
|
||||
if input_ids is not None:
|
||||
input_shape = tf.shape(input_ids)
|
||||
input_shape = shape_list(input_ids)
|
||||
else:
|
||||
input_shape = tf.shape(inputs_embeds)[:-1]
|
||||
input_shape = shape_list(inputs_embeds)[:-1]
|
||||
|
||||
seq_length = input_shape[1]
|
||||
if position_ids is None:
|
||||
@@ -172,8 +172,8 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
|
||||
Returns:
|
||||
float32 tensor with shape [batch_size, length, vocab_size].
|
||||
"""
|
||||
batch_size = tf.shape(inputs)[0]
|
||||
length = tf.shape(inputs)[1]
|
||||
batch_size = shape_list(inputs)[0]
|
||||
length = shape_list(inputs)[1]
|
||||
|
||||
x = tf.reshape(inputs, [-1, self.hidden_size])
|
||||
logits = tf.matmul(x, self.word_embeddings, transpose_b=True)
|
||||
@@ -214,7 +214,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
|
||||
def call(self, inputs, training=False):
|
||||
hidden_states, attention_mask, head_mask = inputs
|
||||
|
||||
batch_size = tf.shape(hidden_states)[0]
|
||||
batch_size = shape_list(hidden_states)[0]
|
||||
mixed_query_layer = self.query(hidden_states)
|
||||
mixed_key_layer = self.key(hidden_states)
|
||||
mixed_value_layer = self.value(hidden_states)
|
||||
@@ -225,7 +225,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
|
||||
|
||||
# Take the dot product between "query" and "key" to get the raw attention scores.
|
||||
attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True) # (batch size, num_heads, seq_len_q, seq_len_k)
|
||||
dk = tf.cast(tf.shape(key_layer)[-1], tf.float32) # scale attention_scores
|
||||
dk = tf.cast(shape_list(key_layer)[-1], tf.float32) # scale attention_scores
|
||||
attention_scores = attention_scores / tf.math.sqrt(dk)
|
||||
|
||||
if attention_mask is not None:
|
||||
@@ -502,9 +502,9 @@ class TFBertMainLayer(tf.keras.layers.Layer):
|
||||
if input_ids is not None and inputs_embeds is not None:
|
||||
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
|
||||
elif input_ids is not None:
|
||||
input_shape = input_ids.shape
|
||||
input_shape = shape_list(input_ids)
|
||||
elif inputs_embeds is not None:
|
||||
input_shape = inputs_embeds.shape[:-1]
|
||||
input_shape = shape_list(inputs_embeds)[:-1]
|
||||
else:
|
||||
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
||||
|
||||
@@ -939,11 +939,11 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
|
||||
input_ids = inputs
|
||||
|
||||
if input_ids is not None:
|
||||
num_choices = tf.shape(input_ids)[1]
|
||||
seq_length = tf.shape(input_ids)[2]
|
||||
num_choices = shape_list(input_ids)[1]
|
||||
seq_length = shape_list(input_ids)[2]
|
||||
else:
|
||||
num_choices = tf.shape(inputs_embeds)[1]
|
||||
seq_length = tf.shape(inputs_embeds)[2]
|
||||
num_choices = shape_list(inputs_embeds)[1]
|
||||
seq_length = shape_list(inputs_embeds)[2]
|
||||
|
||||
flat_input_ids = tf.reshape(input_ids, (-1, seq_length)) if input_ids is not None else None
|
||||
flat_attention_mask = tf.reshape(attention_mask, (-1, seq_length)) if attention_mask is not None else None
|
||||
|
||||
Reference in New Issue
Block a user