diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 247ee395dc..c7f76b175b 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -44,7 +44,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, @@ -188,16 +188,7 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 6e29434c4d..39537b88bf 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -40,7 +40,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ContextManagers, add_code_sample_docstrings, @@ -763,16 +763,7 @@ class TFBartEncoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -965,16 +956,7 @@ class TFBartDecoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 5391d71a91..50ff7f2ddd 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -49,7 +49,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS, @@ -198,16 +198,7 @@ class TFBertEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index 6b95bd5673..ee5755c203 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -38,7 +38,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ContextManagers, add_code_sample_docstrings, @@ -746,16 +746,7 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -956,16 +947,7 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index 3d521ea77a..e170085e91 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -37,7 +37,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ContextManagers, add_code_sample_docstrings, @@ -752,16 +752,7 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -961,16 +952,7 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] diff --git a/src/transformers/models/blip/modeling_tf_blip.py b/src/transformers/models/blip/modeling_tf_blip.py index e166f40b9e..6ae7a2503c 100644 --- a/src/transformers/models/blip/modeling_tf_blip.py +++ b/src/transformers/models/blip/modeling_tf_blip.py @@ -29,7 +29,7 @@ from ...modeling_tf_utils import ( shape_list, unpack_inputs, ) -from ...tf_utils import stable_softmax +from ...tf_utils import check_embeddings_within_bounds, stable_softmax from ...utils import ( ModelOutput, add_start_docstrings, @@ -316,16 +316,7 @@ class TFBlipTextEmbeddings(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/blip/modeling_tf_blip_text.py b/src/transformers/models/blip/modeling_tf_blip_text.py index 262b2cb279..6e8ed8a891 100644 --- a/src/transformers/models/blip/modeling_tf_blip_text.py +++ b/src/transformers/models/blip/modeling_tf_blip_text.py @@ -32,7 +32,7 @@ from ...modeling_tf_utils import ( shape_list, unpack_inputs, ) -from ...tf_utils import invert_attention_mask, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, invert_attention_mask, stable_softmax from ...utils import add_start_docstrings_to_model_forward, logging from .configuration_blip import BlipTextConfig @@ -112,16 +112,7 @@ class TFBlipTextEmbeddings(tf.keras.layers.Layer): position_ids = self.position_ids[:, past_key_values_length : seq_length + past_key_values_length] if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = self.word_embeddings(input_ids) embeddings = inputs_embeds diff --git a/src/transformers/models/camembert/modeling_tf_camembert.py b/src/transformers/models/camembert/modeling_tf_camembert.py index 5142b3d82b..c9e4c98c14 100644 --- a/src/transformers/models/camembert/modeling_tf_camembert.py +++ b/src/transformers/models/camembert/modeling_tf_camembert.py @@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS, @@ -239,16 +239,7 @@ class TFCamembertEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index d2e1b06e57..7cf52500ae 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -34,7 +34,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ModelOutput, add_start_docstrings, @@ -238,16 +238,7 @@ class TFCLIPTextEmbeddings(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index 3976be69eb..e853da7627 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -42,7 +42,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, @@ -124,16 +124,7 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index dcd3f5a03e..f4742b4e33 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -32,7 +32,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from .configuration_ctrl import CTRLConfig @@ -336,16 +336,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.w.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.w.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.w.vocab_size) inputs_embeds = self.w(input_ids, mode="embedding") seq_len = input_shape[-1] mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0) diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index 016ce15db6..dcd0582777 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -39,7 +39,7 @@ from ...modeling_tf_utils import ( get_initializer, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from .configuration_deberta import DebertaConfig @@ -778,16 +778,7 @@ class TFDebertaEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index 015eb39257..b3c210352a 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -38,7 +38,7 @@ from ...modeling_tf_utils import ( get_initializer, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from .configuration_deberta_v2 import DebertaV2Config @@ -867,16 +867,7 @@ class TFDebertaV2Embeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 95c3aef426..3013f4ca30 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -43,7 +43,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, @@ -109,16 +109,7 @@ class TFEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index b782cc987b..82c3381724 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -44,7 +44,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS, @@ -528,16 +528,7 @@ class TFElectraEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/esm/modeling_tf_esm.py b/src/transformers/models/esm/modeling_tf_esm.py index 980b6453f6..135c16a14b 100644 --- a/src/transformers/models/esm/modeling_tf_esm.py +++ b/src/transformers/models/esm/modeling_tf_esm.py @@ -40,7 +40,7 @@ from ...modeling_tf_utils import ( shape_list, unpack_inputs, ) -from ...tf_utils import stable_softmax +from ...tf_utils import check_embeddings_within_bounds, stable_softmax from ...utils import logging from .configuration_esm import EsmConfig @@ -214,16 +214,7 @@ class TFEsmEmbeddings(Layer): position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds) if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = self.word_embeddings(input_ids) # Note that if we want to support ESM-1 (not 1b!) in future then we need to support an diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index 919cd6cc1e..b1dd523ded 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, @@ -578,16 +578,7 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): # embeddings if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embeddings.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embeddings.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embeddings.vocab_size) inputs_embeds = self.embeddings(input_ids) tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids) diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index 2b109cdbab..84254f2b28 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -42,7 +42,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, @@ -109,16 +109,7 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer): assert not (input_ids is not None and inputs_embeds is not None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(self.weight, input_ids) final_embeddings = self.LayerNorm(inputs=inputs_embeds) diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index a84fdbd806..d0c731878d 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -39,7 +39,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, ModelOutput, @@ -437,16 +437,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = self.wte(input_ids, mode="embedding") position_embeds = tf.gather(self.wpe, position_ids) diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index f077a52a03..fbef4f0eff 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -43,7 +43,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import logging from .configuration_gptj import GPTJConfig @@ -437,16 +437,7 @@ class TFGPTJMainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.wte.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.wte.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.wte.vocab_size) inputs_embeds = self.wte(input_ids, mode="embedding") if token_type_ids is not None: diff --git a/src/transformers/models/groupvit/modeling_tf_groupvit.py b/src/transformers/models/groupvit/modeling_tf_groupvit.py index 3826b83e7a..4891931c20 100644 --- a/src/transformers/models/groupvit/modeling_tf_groupvit.py +++ b/src/transformers/models/groupvit/modeling_tf_groupvit.py @@ -33,7 +33,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ModelOutput, add_start_docstrings, @@ -572,16 +572,7 @@ class TFGroupViTTextEmbeddings(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 2097ae58b8..2755e05537 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -41,7 +41,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings from .configuration_layoutlm import LayoutLMConfig @@ -140,16 +140,7 @@ class TFLayoutLMEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index 95ef5580b9..491ef186e5 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -36,6 +36,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) +from ...tf_utils import check_embeddings_within_bounds from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, replace_return_docstrings from .configuration_layoutlmv3 import LayoutLMv3Config @@ -240,16 +241,7 @@ class TFLayoutLMv3TextEmbeddings(tf.keras.layers.Layer): token_type_ids = tf.zeros(input_shape, dtype=position_ids.dtype) if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.word_embeddings.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.word_embeddings.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.word_embeddings.input_dim) inputs_embeds = self.word_embeddings(input_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 75d4a15f19..324482b4d2 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -33,7 +33,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ContextManagers, ModelOutput, @@ -1746,16 +1746,7 @@ class TFLEDEncoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) elif inputs_embeds is not None: input_shape = shape_list(inputs_embeds)[:-1] @@ -2038,16 +2029,7 @@ class TFLEDDecoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) hidden_states = inputs_embeds diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index e5e22a2127..c47df16965 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -34,7 +34,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, @@ -538,16 +538,7 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index 9408188100..948053c93e 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -32,7 +32,7 @@ from ...modeling_tf_utils import ( shape_list, unpack_inputs, ) -from ...tf_utils import stable_softmax +from ...tf_utils import check_embeddings_within_bounds, stable_softmax from ...utils import ( ModelOutput, add_code_sample_docstrings, @@ -232,16 +232,7 @@ class TFLxmertEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index a0e26de9bd..1751158832 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -37,7 +37,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ContextManagers, add_code_sample_docstrings, @@ -778,16 +778,7 @@ class TFMarianEncoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -990,16 +981,7 @@ class TFMarianDecoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 6f48062fc6..13453bd22d 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -37,7 +37,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ContextManagers, add_code_sample_docstrings, @@ -770,16 +770,7 @@ class TFMBartEncoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -989,16 +980,7 @@ class TFMBartDecoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 832a4fa3f5..c47cde847d 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, @@ -212,16 +212,7 @@ class TFMobileBertEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index 48866e21d4..08db310173 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -45,7 +45,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, @@ -144,16 +144,7 @@ class TFMPNetEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 5723001729..7c04520c9c 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -35,7 +35,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ModelOutput, add_code_sample_docstrings, @@ -295,30 +295,12 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = self.tokens_embed(input_ids, mode="embedding") position_embeds = tf.gather(self.positions_embed, position_ids) if token_type_ids is not None: token_type_ids = tf.reshape(token_type_ids, [-1, shape_list(token_type_ids)[-1]]) - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - token_type_ids, - tf.cast(self.config.vocab_size, dtype=token_type_ids.dtype), - message=( - "token_type_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(token_type_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(token_type_ids, self.config.vocab_size, "token_type_ids") token_type_embeds = self.tokens_embed(token_type_ids, mode="embedding") else: token_type_embeds = 0 diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index 4f738b1605..1855fcb1bc 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -33,7 +33,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( add_code_sample_docstrings, add_start_docstrings, @@ -631,16 +631,7 @@ class TFOPTDecoder(tf.keras.layers.Layer): past_key_values_length = shape_list(past_key_values[0][0])[2] if past_key_values is not None else 0 if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.vocab_size) inputs_embeds = self.embed_tokens(input_ids) if attention_mask is None: diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 5955d50d61..1ccccc2dc5 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -38,7 +38,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ContextManagers, add_code_sample_docstrings, @@ -782,16 +782,7 @@ class TFPegasusEncoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -997,16 +988,7 @@ class TFPegasusDecoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index 74683dfc0c..c4dc8c5a14 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -45,7 +45,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS, @@ -122,16 +122,7 @@ class TFRemBertEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 606afb754b..7aa2c9e07a 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS, @@ -144,16 +144,7 @@ class TFRobertaEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py index 1843605bd0..fedfea56a7 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py @@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS, @@ -149,16 +149,7 @@ class TFRobertaPreLayerNormEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index 952250e68a..2d1387d2d8 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, @@ -175,16 +175,7 @@ class TFRoFormerEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 009d2538ea..e5c38afa83 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -36,7 +36,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( add_code_sample_docstrings, add_start_docstrings, @@ -1030,16 +1030,7 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer): past_key_values_length = shape_list(past_key_values[0][0])[2] if past_key_values is not None else 0 if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.vocab_size) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale else: inputs_embeds = inputs_embeds diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index f9996e1531..ec3e67db26 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -40,7 +40,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, DUMMY_MASK, @@ -686,16 +686,7 @@ class TFT5MainLayer(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) batch_size, seq_length = input_shape diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index 5c995aa930..f876730b09 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -38,7 +38,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( ModelOutput, add_start_docstrings, @@ -231,16 +231,7 @@ class TFTapasEmbeddings(tf.keras.layers.Layer): position_ids = tf.math.minimum(self.max_position_embeddings - 1, position - first_position) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) position_embeddings = tf.gather(self.position_embeddings, indices=position_ids) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index d9a175062b..0d2a2682cc 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -36,7 +36,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings from .configuration_whisper import WhisperConfig @@ -882,16 +882,7 @@ class TFWhisperDecoder(tf.keras.layers.Layer): past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0 if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) attention_mask = self._prepare_decoder_attention_mask(attention_mask, input_shape, past_key_values_length) diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index d112e641a9..1a0146bf19 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -42,7 +42,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import logging from .configuration_xglm import XGLMConfig @@ -527,16 +527,7 @@ class TFXGLMMainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.vocab_size) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale attention_mask = self._prepare_decoder_attention_mask(attention_mask, input_shape, past_key_values_length) diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index f77111cee4..da9bd1c603 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -45,7 +45,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, @@ -440,16 +440,7 @@ class TFXLMMainLayer(tf.keras.layers.Layer): # embeddings if inputs_embeds is None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embeddings.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embeddings.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embeddings.vocab_size) inputs_embeds = self.embeddings(input_ids) tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids) diff --git a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py index b5fc694148..2f51c032f1 100644 --- a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py @@ -46,7 +46,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( DUMMY_INPUTS, MULTIPLE_CHOICE_DUMMY_INPUTS, @@ -233,16 +233,7 @@ class TFXLMRobertaEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.config.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.config.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.config.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 080dd91f23..52538ced57 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -39,7 +39,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, @@ -678,16 +678,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): if inputs_embeds is not None: word_emb_k = inputs_embeds else: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.word_embedding.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.word_embedding.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.word_embedding.vocab_size) word_emb_k = self.word_embedding(input_ids) output_h = self.dropout(word_emb_k, training=training) if target_mapping is not None: diff --git a/src/transformers/tf_utils.py b/src/transformers/tf_utils.py index 20fe71d6ae..2d4fa6fda9 100644 --- a/src/transformers/tf_utils.py +++ b/src/transformers/tf_utils.py @@ -96,3 +96,23 @@ def invert_attention_mask(encoder_attention_mask: tf.Tensor) -> tf.Tensor: ) * encoder_extended_attention_mask.dtype.min return encoder_extended_attention_mask + + +def check_embeddings_within_bounds(tensor: tf.Tensor, embed_dim: int, tensor_name: str = "input_ids") -> None: + """ + `tf.gather`, on which TF embedding layers are based, won't check positive out of bound indices on GPU, returning + zeros instead. This function adds a check against that dangerous silent behavior. + + Args: + tensor (`tf.Tensor`): The tensor of indices to check. + embed_dim (`int`): The embedding dimension. + tensor_name (`str`, *optional*): The name of the tensor to use in the error message. + """ + tf.debugging.assert_less( + tensor, + tf.cast(embed_dim, dtype=tensor.dtype), + message=( + f"The maximum value of {tensor_name} ({tf.math.reduce_max(tensor)}) must be smaller than the embedding " + f"layer's input dimension ({embed_dim}). The likely cause is some problem at tokenization time." + ), + ) diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index df6adc3c4d..ffe5e7de95 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -53,7 +53,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import logging from .configuration_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Config @@ -126,16 +126,7 @@ class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.vocab_size, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" - ), - ) + check_embeddings_within_bounds(input_ids, self.vocab_size) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] @@ -1670,7 +1661,7 @@ from ...modeling_tf_utils import ( keras_serializable, unpack_inputs, ) -from ...tf_utils import shape_list, stable_softmax +from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ContextManagers, logging from .configuration_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Config @@ -2311,16 +2302,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -2518,16 +2500,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer): if hasattr(self.embed_tokens, "load_weight_prefix"): context.append(tf.name_scope(self.embed_tokens.load_weight_prefix + "/")) with ContextManagers(context): - # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound - # indices on GPU, returning zeros instead. This is a dangerous silent behavior. - tf.debugging.assert_less( - input_ids, - tf.cast(self.embed_tokens.input_dim, dtype=input_ids.dtype), - message=( - "input_ids must be smaller than the embedding layer's input dimension (got" - f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.input_dim})" - ), - ) + check_embeddings_within_bounds(input_ids, self.embed_tokens.input_dim) inputs_embeds = self.embed_tokens(input_ids) hidden_states = inputs_embeds