From 1b5ab39cf4430253e70570cb9b6bacbb5c595133 Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Thu, 22 Sep 2022 13:21:51 +0100 Subject: [PATCH] TF: check embeddings range (#19102) --- src/transformers/modeling_tf_utils.py | 1 + .../models/albert/modeling_tf_albert.py | 10 +++++++ .../models/bert/modeling_tf_bert.py | 10 +++++++ .../blenderbot/modeling_tf_blenderbot.py | 20 +++++++++++++ .../modeling_tf_blenderbot_small.py | 20 +++++++++++++ .../models/clip/modeling_tf_clip.py | 10 +++++++ .../models/convbert/modeling_tf_convbert.py | 10 +++++++ .../models/ctrl/modeling_tf_ctrl.py | 10 +++++++ .../models/deberta/modeling_tf_deberta.py | 10 +++++++ .../deberta_v2/modeling_tf_deberta_v2.py | 10 +++++++ .../distilbert/modeling_tf_distilbert.py | 10 +++++++ .../models/electra/modeling_tf_electra.py | 10 +++++++ .../models/flaubert/modeling_tf_flaubert.py | 10 +++++++ .../models/funnel/modeling_tf_funnel.py | 10 +++++++ .../models/gpt2/modeling_tf_gpt2.py | 10 +++++++ .../models/gptj/modeling_tf_gptj.py | 10 +++++++ .../models/layoutlm/modeling_tf_layoutlm.py | 10 +++++++ .../layoutlmv3/modeling_tf_layoutlmv3.py | 10 +++++++ .../models/led/modeling_tf_led.py | 20 +++++++++++++ .../longformer/modeling_tf_longformer.py | 10 +++++++ .../models/lxmert/modeling_tf_lxmert.py | 10 +++++++ .../models/marian/modeling_tf_marian.py | 20 +++++++++++++ .../models/mbart/modeling_tf_mbart.py | 20 +++++++++++++ .../mobilebert/modeling_tf_mobilebert.py | 10 +++++++ .../models/mpnet/modeling_tf_mpnet.py | 10 +++++++ .../models/openai/modeling_tf_openai.py | 20 +++++++++++++ .../models/opt/modeling_tf_opt.py | 10 +++++++ .../models/pegasus/modeling_tf_pegasus.py | 20 +++++++++++++ .../models/rembert/modeling_tf_rembert.py | 10 +++++++ .../models/roberta/modeling_tf_roberta.py | 10 +++++++ .../models/roformer/modeling_tf_roformer.py | 10 +++++++ .../modeling_tf_speech_to_text.py | 10 +++++++ src/transformers/models/t5/modeling_tf_t5.py | 10 +++++++ .../models/xglm/modeling_tf_xglm.py | 10 +++++++ .../models/xlm/modeling_tf_xlm.py | 10 +++++++ .../models/xlnet/modeling_tf_xlnet.py | 10 +++++++ ...tf_{{cookiecutter.lowercase_modelname}}.py | 30 +++++++++++++++++++ 37 files changed, 451 insertions(+) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index d06c619cdf..c0f1559286 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -3054,6 +3054,7 @@ class TFWrappedEmbeddings: def __init__(self, layer, abs_scope_name=None): self._layer = layer self._abs_scope_name = abs_scope_name + self.vocab_size = self._layer.vocab_size def call(self, inputs, mode="embedding"): if self._abs_scope_name is None: diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 5bef3cc39c..f3f1169554 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -190,6 +190,16 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index b1240b4763..12c810d012 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -200,6 +200,16 @@ class TFBertEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index c56b30cf83..73a1cc0226 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -726,6 +726,16 @@ class TFBlenderbotEncoder(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -923,6 +933,16 @@ class TFBlenderbotDecoder(tf.keras.layers.Layer): positions = self.embed_positions(input_shape, position_ids=position_ids) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index 8383dc0972..cd8e7351ba 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -731,6 +731,16 @@ class TFBlenderbotSmallEncoder(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -921,6 +931,16 @@ class TFBlenderbotSmallDecoder(tf.keras.layers.Layer): past_key_values_length = shape_list(past_key_values[0][0])[2] if past_key_values is not None else 0 if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index df49006210..6edd09c2e1 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -241,6 +241,16 @@ class TFCLIPTextEmbeddings(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index f69e54282d..b778b73c96 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -126,6 +126,16 @@ class TFConvBertEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index 0cfcb44f65..81872b9671 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -338,6 +338,16 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.w.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.w.vocab_size})" + ), + ) inputs_embeds = self.w(input_ids, mode="embedding") seq_len = input_shape[-1] mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0) diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index 652389f3b5..ff8e41d3ab 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -783,6 +783,16 @@ class TFDebertaEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index bb1d55692c..3890731b4d 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -872,6 +872,16 @@ class TFDebertaV2Embeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index bc1df7f41a..07ad3593a2 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -110,6 +110,16 @@ class TFEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index d2426cae9c..8ed75b3bff 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -530,6 +530,16 @@ class TFElectraEmbeddings(tf.keras.layers.Layer): raise ValueError("Need to provide either `input_ids` or `input_embeds`.") if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index b33e057232..3601c1cb9a 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -573,6 +573,16 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): # embeddings if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embeddings.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embeddings.vocab_size})" + ), + ) inputs_embeds = self.embeddings(input_ids) tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids) diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index da71be87d9..2ec5debbf0 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -110,6 +110,16 @@ class TFFunnelEmbeddings(tf.keras.layers.Layer): assert not (input_ids is not None and inputs_embeds is not None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(self.weight, input_ids) final_embeddings = self.LayerNorm(inputs=inputs_embeds) diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 5b1e21a3c2..cd3f410b6a 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -442,6 +442,16 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = self.wte(input_ids, mode="embedding") position_embeds = tf.gather(self.wpe, position_ids) diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index 67d17cdc05..d7fa53bfe9 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -440,6 +440,16 @@ class TFGPTJMainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.wte.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.wte.vocab_size})" + ), + ) inputs_embeds = self.wte(input_ids, mode="embedding") if token_type_ids is not None: diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 26ae260dd1..74edd11009 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -141,6 +141,16 @@ class TFLayoutLMEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index 6bb3eb54e1..c85989913e 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -240,6 +240,16 @@ class TFLayoutLMv3TextEmbeddings(tf.keras.layers.Layer): token_type_ids = tf.zeros(input_shape, dtype=position_ids.dtype) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.word_embeddings.input_dim, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.word_embeddings.input_dim})" + ), + ) inputs_embeds = self.word_embeddings(input_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 3702f1cca3..0a803212da 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1737,6 +1737,16 @@ class TFLEDEncoder(tf.keras.layers.Layer): raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") elif input_ids is not None: input_shape = shape_list(input_ids) + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) elif inputs_embeds is not None: input_shape = shape_list(inputs_embeds)[:-1] @@ -2012,6 +2022,16 @@ class TFLEDDecoder(tf.keras.layers.Layer): positions = self.embed_positions(input_shape, past_key_values_length) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) hidden_states = inputs_embeds diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 51b8cca0b0..489bb113f9 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -540,6 +540,16 @@ class TFLongformerEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index 3b44056a6c..8c61a7d868 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -227,6 +227,16 @@ class TFLxmertEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index 580c38f843..a88cbe7b73 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -772,6 +772,16 @@ class TFMarianEncoder(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -967,6 +977,16 @@ class TFMarianDecoder(tf.keras.layers.Layer): positions = self.embed_positions(input_shape, position_ids=position_ids) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index ec034a5fa8..e870372b7a 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -757,6 +757,16 @@ class TFMBartEncoder(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -959,6 +969,16 @@ class TFMBartDecoder(tf.keras.layers.Layer): positions = self.embed_positions(input_shape, position_ids=position_ids) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 25fe932746..f6138b7ce5 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -214,6 +214,16 @@ class TFMobileBertEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index 3ceb1489a9..5aca157571 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -145,6 +145,16 @@ class TFMPNetEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 357866228b..74aa798dfa 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -298,10 +298,30 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer): position_ids = tf.reshape(position_ids, [-1, shape_list(position_ids)[-1]]) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = self.tokens_embed(input_ids, mode="embedding") position_embeds = tf.gather(self.positions_embed, position_ids) if token_type_ids is not None: token_type_ids = tf.reshape(token_type_ids, [-1, shape_list(token_type_ids)[-1]]) + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + token_type_ids, + tf.cast(self.vocab_size, dtype=token_type_ids.dtype), + message=( + "token_type_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(token_type_ids)} >= {self.vocab_size})" + ), + ) token_type_embeds = self.tokens_embed(token_type_ids, mode="embedding") else: token_type_embeds = 0 diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index b523ecceb1..43cc952387 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -632,6 +632,16 @@ class TFOPTDecoder(tf.keras.layers.Layer): past_key_values_length = shape_list(past_key_values[0][0])[2] if past_key_values is not None else 0 if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) if attention_mask is None: diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 97efed9285..a4eeabb41d 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -775,6 +775,16 @@ class TFPegasusEncoder(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -973,6 +983,16 @@ class TFPegasusDecoder(tf.keras.layers.Layer): positions = self.embed_positions(input_shape, position_ids=position_ids) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale hidden_states = inputs_embeds diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index be1946f111..ce1c45d479 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -124,6 +124,16 @@ class TFRemBertEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 04f4283b20..4b34cadbf1 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -146,6 +146,16 @@ class TFRobertaEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index 4c526b6941..437aca26a0 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -177,6 +177,16 @@ class TFRoFormerEmbeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index e6e1b0facc..ef37691448 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -1022,6 +1022,16 @@ class TFSpeech2TextDecoder(tf.keras.layers.Layer): past_key_values_length = shape_list(past_key_values[0][0])[2] if past_key_values is not None else 0 if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale else: inputs_embeds = inputs_embeds diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index ae39ea2615..b83a0fad2b 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -681,6 +681,16 @@ class TFT5MainLayer(tf.keras.layers.Layer): if inputs_embeds is None: assert self.embed_tokens is not None, "You have to initialize the model with valid token embeddings" + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) batch_size, seq_length = input_shape diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index 6dd62d270b..9856b16112 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -533,6 +533,16 @@ class TFXGLMMainLayer(tf.keras.layers.Layer): past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0 if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale attention_mask = self._prepare_decoder_attention_mask(attention_mask, input_shape, past_key_values_length) diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index a060986a57..8bc0925c2f 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -440,6 +440,16 @@ class TFXLMMainLayer(tf.keras.layers.Layer): # embeddings if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embeddings.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embeddings.vocab_size})" + ), + ) inputs_embeds = self.embeddings(input_ids) tensor = inputs_embeds + tf.gather(self.position_embeddings, position_ids) diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 1b079d00be..a55a3c72a5 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -680,6 +680,16 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): if inputs_embeds is not None: word_emb_k = inputs_embeds else: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.word_embedding.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.word_embedding.vocab_size})" + ), + ) word_emb_k = self.word_embedding(input_ids) output_h = self.dropout(word_emb_k, training=training) if target_mapping is not None: diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index 8225105ddf..b4ab100fe8 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -127,6 +127,16 @@ class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer): assert not (input_ids is None and inputs_embeds is None) if input_ids is not None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.vocab_size})" + ), + ) inputs_embeds = tf.gather(params=self.weight, indices=input_ids) input_shape = shape_list(inputs_embeds)[:-1] @@ -2305,6 +2315,16 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): raise ValueError("You have to specify either input_ids or inputs_embeds") if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale embed_pos = self.embed_positions(input_shape) @@ -2494,6 +2514,16 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer): positions = self.embed_positions(input_shape, past_key_values_length) if inputs_embeds is None: + # Note: tf.gather, on which the embedding layer is based, won't check positive out of bound + # indices on GPU, returning zeros instead. This is a dangerous silent behavior. + tf.debugging.assert_less( + input_ids, + tf.cast(self.embed_tokens.vocab_size, dtype=input_ids.dtype), + message=( + "input_ids must be smaller than the embedding layer's input dimension (got" + f" {tf.math.reduce_max(input_ids)} >= {self.embed_tokens.vocab_size})" + ), + ) inputs_embeds = self.embed_tokens(input_ids) hidden_states = inputs_embeds