From de4159a318f9a4f2f474733306c00ac326e643a7 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 28 Nov 2022 13:24:44 +0000 Subject: [PATCH] More TF int dtype fixes (#20384) * Add a test to ensure int dummy inputs are int64 * Move the test into the existing int64 test and update a lot of existing dummies * Fix remaining dummies * Fix remaining dummies * Test for int64 serving sigs as well * Update core tests to use tf.int64 * Add better messages to the assertions * Update all serving sigs to int64 * More sneaky hiding tf.int32s * Add an optional int32 signature in save_pretrained * make fixup * Add Amy's suggestions * Switch all serving sigs back to tf.int32 * Switch all dummies to tf.int32 * Adjust tests to check for tf.int32 instead of tf.int64 * Fix base dummy_inputs dtype * Start casting to tf.int32 in input_processing * Change dtype for unpack_inputs test * Add proper tf.int32 test * Make the alternate serving signature int64 --- src/transformers/modeling_tf_utils.py | 41 +++++++++++++++---- .../models/albert/modeling_tf_albert.py | 2 +- .../models/bart/modeling_tf_bart.py | 14 +++---- .../models/bert/modeling_tf_bert.py | 10 ++--- .../blenderbot/modeling_tf_blenderbot.py | 6 +-- .../modeling_tf_blenderbot_small.py | 6 +-- .../models/camembert/modeling_tf_camembert.py | 8 ++-- .../models/clip/modeling_tf_clip.py | 4 +- .../models/convbert/modeling_tf_convbert.py | 8 ++-- .../distilbert/modeling_tf_distilbert.py | 6 +-- .../models/dpr/modeling_tf_dpr.py | 4 +- .../models/electra/modeling_tf_electra.py | 4 +- .../modeling_tf_encoder_decoder.py | 2 +- .../models/esm/modeling_tf_esm.py | 16 ++++---- .../models/flaubert/modeling_tf_flaubert.py | 12 +++--- .../models/funnel/modeling_tf_funnel.py | 6 +-- .../models/gpt2/modeling_tf_gpt2.py | 6 +-- .../models/gptj/modeling_tf_gptj.py | 6 +-- .../models/groupvit/modeling_tf_groupvit.py | 8 ++-- .../models/hubert/modeling_tf_hubert.py | 4 +- .../layoutlmv3/modeling_tf_layoutlmv3.py | 6 +-- .../models/led/modeling_tf_led.py | 14 +++---- .../longformer/modeling_tf_longformer.py | 20 ++++----- .../models/lxmert/modeling_tf_lxmert.py | 12 +++--- .../models/marian/modeling_tf_marian.py | 2 +- .../models/mbart/modeling_tf_mbart.py | 2 +- .../mobilebert/modeling_tf_mobilebert.py | 2 +- .../models/mpnet/modeling_tf_mpnet.py | 6 +-- .../models/openai/modeling_tf_openai.py | 4 +- .../models/opt/modeling_tf_opt.py | 8 ++-- .../models/pegasus/modeling_tf_pegasus.py | 6 +-- .../models/rembert/modeling_tf_rembert.py | 10 ++--- .../models/roberta/modeling_tf_roberta.py | 8 ++-- .../models/roformer/modeling_tf_roformer.py | 8 ++-- .../modeling_tf_speech_to_text.py | 6 +-- src/transformers/models/t5/modeling_tf_t5.py | 14 +++---- .../models/tapas/modeling_tf_tapas.py | 4 +- .../transfo_xl/modeling_tf_transfo_xl.py | 2 +- .../modeling_tf_vision_encoder_decoder.py | 2 +- .../models/wav2vec2/modeling_tf_wav2vec2.py | 4 +- .../models/whisper/modeling_tf_whisper.py | 6 +-- .../models/xglm/modeling_tf_xglm.py | 6 +-- .../models/xlm/modeling_tf_xlm.py | 12 +++--- .../models/xlnet/modeling_tf_xlnet.py | 8 ++-- ...tf_{{cookiecutter.lowercase_modelname}}.py | 4 +- tests/test_modeling_tf_common.py | 28 +++++++++++-- tests/utils/test_modeling_tf_core.py | 5 +++ 47 files changed, 215 insertions(+), 167 deletions(-) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index e8c87d83f1..fa6df4e40a 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -585,10 +585,10 @@ def input_processing(func, config, **kwargs): cast_output = dict() for key, val in output.items(): - if isinstance(val, tf.Tensor) and val.dtype == tf.int32: - cast_output[key] = tf.cast(val, tf.int64) - elif isinstance(val, np.ndarray) and val.dtype == np.int32: - cast_output[key] = val.astype(np.int64) + if isinstance(val, tf.Tensor) and val.dtype == tf.int64: + cast_output[key] = tf.cast(val, tf.int32) + elif isinstance(val, np.ndarray) and val.dtype == np.int64: + cast_output[key] = val.astype(np.int32) else: cast_output[key] = val @@ -1115,7 +1115,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu `Dict[str, tf.Tensor]`: The dummy inputs. """ return { - "input_ids": tf.constant(DUMMY_INPUTS), + "input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32), } @property @@ -1155,12 +1155,25 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu """ return cls(config, **kwargs) + def eager_serving(self, inputs): + """ + Method used for serving the model. Intended not to be compiled with a tf.function decorator so that we can use + it to generate multiple signatures later. + + Args: + inputs (`Dict[str, tf.Tensor]`): + The input of the saved model as a dictionary of tensors. + """ + output = self.call(inputs) + + return self.serving_output(output) + @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int64, name="token_type_ids"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), } ] ) @@ -2253,7 +2266,17 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu if saved_model: if signatures is None: - signatures = self.serving + if any(spec.dtype == tf.int32 for spec in self.serving.input_signature[0].values()): + int64_spec = { + key: tf.TensorSpec( + shape=spec.shape, dtype=tf.int64 if spec.dtype == tf.int32 else spec.dtype, name=spec.name + ) + for key, spec in self.serving.input_signature[0].items() + } + int64_serving = tf.function(self.eager_serving, input_signature=[int64_spec]) + signatures = {"serving_default": self.serving, "int64_serving": int64_serving} + else: + signatures = self.serving saved_model_dir = os.path.join(save_directory, "saved_model", str(version)) self.save(saved_model_dir, include_optimizer=False, signatures=signatures) logger.info(f"Saved model created in {saved_model_dir}") diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index f3f1169554..0182fcfd3f 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -1396,7 +1396,7 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss): Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 5de35b5a7c..7dbe3384e2 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -467,11 +467,11 @@ class TFBartPretrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): pad_token = 1 - input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) + input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) + decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) dummy_inputs = { "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.math.not_equal(input_ids, pad_token), + "attention_mask": tf.cast(input_ids != pad_token, tf.int32), "input_ids": input_ids, } return dummy_inputs @@ -479,10 +479,10 @@ class TFBartPretrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int64, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int64, name="decoder_attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), } ] ) diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 12c810d012..3ddc36fc7a 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -920,7 +920,7 @@ class TFBertPreTrainedModel(TFPreTrainedModel): Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS)} + dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized if self.config.add_cross_attention: batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape @@ -1726,7 +1726,7 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss): Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @@ -1809,9 +1809,9 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"), + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index 50ceab3e02..0e9d39d7da 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -466,11 +466,11 @@ class TFBlenderbotPreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): pad_token = 1 - input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) + input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) + decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) dummy_inputs = { "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.math.not_equal(input_ids, pad_token), + "attention_mask": tf.cast(input_ids != pad_token, tf.int32), "input_ids": input_ids, } return dummy_inputs diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index e6c066af12..6e7a6dd270 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -466,11 +466,11 @@ class TFBlenderbotSmallPreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): pad_token = 1 - input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) + input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) + decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) dummy_inputs = { "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.math.not_equal(input_ids, pad_token), + "attention_mask": tf.cast(input_ids != pad_token, tf.int32), "input_ids": input_ids, } return dummy_inputs diff --git a/src/transformers/models/camembert/modeling_tf_camembert.py b/src/transformers/models/camembert/modeling_tf_camembert.py index 9e891e97fd..f75b97ed47 100644 --- a/src/transformers/models/camembert/modeling_tf_camembert.py +++ b/src/transformers/models/camembert/modeling_tf_camembert.py @@ -891,7 +891,7 @@ class TFCamembertPreTrainedModel(TFPreTrainedModel): Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS)} + dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized if self.config.add_cross_attention: batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape @@ -904,8 +904,8 @@ class TFCamembertPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) @@ -1390,7 +1390,7 @@ class TFCamembertForMultipleChoice(TFCamembertPreTrainedModel, TFMultipleChoiceL Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward( diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index 6edd09c2e1..e6646d4472 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -1107,8 +1107,8 @@ class TFCLIPTextModel(TFCLIPPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index b778b73c96..4f9d4af41f 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -1058,7 +1058,7 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.convert_to_tensor(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.convert_to_tensor(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward( @@ -1137,9 +1137,9 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"), + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 07ad3593a2..93c4be7624 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -434,8 +434,8 @@ class TFDistilBertPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) @@ -898,7 +898,7 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward( diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index c166cd0486..96ee761b81 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -376,8 +376,8 @@ class TFDPRPretrainedReader(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 8ed75b3bff..973860596e 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -613,7 +613,7 @@ class TFElectraPreTrainedModel(TFPreTrainedModel): Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS)} + dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized if self.config.add_cross_attention: batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape @@ -1375,7 +1375,7 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss) Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 5a8fb1ece1..a13d18806e 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -271,7 +271,7 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): `Dict[str, tf.Tensor]`: The dummy inputs. """ # Add `decoder_input_ids` because `self.decoder` requires it. - input_ids = tf.constant(DUMMY_INPUTS) + input_ids = tf.constant(DUMMY_INPUTS, dtype=tf.int32) dummy = {"input_ids": input_ids, "decoder_input_ids": input_ids} return dummy diff --git a/src/transformers/models/esm/modeling_tf_esm.py b/src/transformers/models/esm/modeling_tf_esm.py index 27a5c00ce8..0aaa2addd9 100644 --- a/src/transformers/models/esm/modeling_tf_esm.py +++ b/src/transformers/models/esm/modeling_tf_esm.py @@ -981,8 +981,8 @@ class TFEsmModel(TFEsmPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) @@ -1113,8 +1113,8 @@ class TFEsmForMaskedLM(TFEsmPreTrainedModel, TFMaskedLanguageModelingLoss): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) @@ -1247,8 +1247,8 @@ class TFEsmForSequenceClassification(TFEsmPreTrainedModel, TFSequenceClassificat @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), } ] ) @@ -1344,8 +1344,8 @@ class TFEsmForTokenClassification(TFEsmPreTrainedModel, TFTokenClassificationLos @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index 7520d112ad..05b6922795 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -226,13 +226,13 @@ class TFFlaubertPreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): # Sometimes Flaubert has language embeddings so don't forget to build them as well if needed - inputs_list = tf.constant([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]]) - attns_list = tf.constant([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]]) + inputs_list = tf.constant([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]], dtype=tf.int32) + attns_list = tf.constant([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int32) if self.config.use_lang_emb and self.config.n_langs > 1: return { "input_ids": inputs_list, "attention_mask": attns_list, - "langs": tf.constant([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]]), + "langs": tf.constant([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int32), } else: return {"input_ids": inputs_list, "attention_mask": attns_list} @@ -1169,12 +1169,12 @@ class TFFlaubertForMultipleChoice(TFFlaubertPreTrainedModel, TFMultipleChoiceLos # Sometimes Flaubert has language embeddings so don't forget to build them as well if needed if self.config.use_lang_emb and self.config.n_langs > 1: return { - "input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS), - "langs": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS), + "input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32), + "langs": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32), } else: return { - "input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS), + "input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32), } @unpack_inputs diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index 2ec5debbf0..6e5b10fa43 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -1446,7 +1446,7 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @@ -1521,9 +1521,9 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int64, name="token_type_ids"), + "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index a64f0f0ff5..5f6f07a435 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -545,7 +545,7 @@ class TFGPT2PreTrainedModel(TFPreTrainedModel): Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS)} + dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized if self.config.add_cross_attention: batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape @@ -558,8 +558,8 @@ class TFGPT2PreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index e70785c91e..c25b930973 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -531,14 +531,14 @@ class TFGPTJPreTrainedModel(TFPreTrainedModel): Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS)} + dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} return dummy @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/groupvit/modeling_tf_groupvit.py b/src/transformers/models/groupvit/modeling_tf_groupvit.py index 5ccd6c1982..551e7a5134 100644 --- a/src/transformers/models/groupvit/modeling_tf_groupvit.py +++ b/src/transformers/models/groupvit/modeling_tf_groupvit.py @@ -1640,8 +1640,8 @@ class TFGroupViTTextModel(TFGroupViTPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) @@ -1828,9 +1828,9 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), "pixel_values": tf.TensorSpec((None, None, None, None), tf.float64, name="pixel_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index ec7458a7eb..e9cf9adb76 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -1312,8 +1312,8 @@ class TFHubertPreTrainedModel(TFPreTrainedModel): input_signature=[ { "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int64, name="token_type_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index c85989913e..242d8d3983 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -998,10 +998,10 @@ class TFLayoutLMv3PreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "bbox": tf.TensorSpec((None, None, 4), tf.int64, name="bbox"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "bbox": tf.TensorSpec((None, None, 4), tf.int32, name="bbox"), "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 76fd65eada..78e98562ac 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1323,10 +1323,10 @@ class TFLEDPreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): - input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0]], dtype=tf.int64) + input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0]], dtype=tf.int32) # make sure global layers are initialized - attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0]], dtype=tf.int64) - global_attention_mask = tf.convert_to_tensor([[0, 0, 0, 0, 1], [0, 0, 1, 0, 0]], dtype=tf.int64) + attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0]], dtype=tf.int32) + global_attention_mask = tf.convert_to_tensor([[0, 0, 0, 0, 1], [0, 0, 1, 0, 0]], dtype=tf.int32) dummy_inputs = { "input_ids": input_ids, "attention_mask": attention_mask, @@ -1338,10 +1338,10 @@ class TFLEDPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int64, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int64, name="decoder_attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), } ] ) diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 489bb113f9..7c929f353d 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -1884,14 +1884,14 @@ class TFLongformerPreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): - input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]], dtype=tf.int64) + input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]], dtype=tf.int32) # make sure global layers are initialized - attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int64) + attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int32) global_attention_mask = tf.convert_to_tensor( - [[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int64 + [[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int32 ) global_attention_mask = tf.convert_to_tensor( - [[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int64 + [[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int32 ) return { "input_ids": input_ids, @@ -1902,8 +1902,8 @@ class TFLongformerPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) @@ -2497,9 +2497,9 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic @property def dummy_inputs(self): - input_ids = tf.convert_to_tensor(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int64) + input_ids = tf.convert_to_tensor(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32) # make sure global layers are initialized - global_attention_mask = tf.convert_to_tensor([[[0, 0, 0, 1], [0, 0, 0, 1]]] * 2, dtype=tf.int64) + global_attention_mask = tf.convert_to_tensor([[[0, 0, 0, 1], [0, 0, 0, 1]]] * 2, dtype=tf.int32) return {"input_ids": input_ids, "global_attention_mask": global_attention_mask} @unpack_inputs @@ -2591,8 +2591,8 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index b4f0589eed..e4c27f3b3c 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -655,7 +655,7 @@ class TFLxmertMainLayer(tf.keras.layers.Layer): """ batch_size = 2 num_visual_features = 10 - input_ids = tf.constant([[3, 5, 6], [2, 3, 4]]) + input_ids = tf.constant([[3, 5, 6], [2, 3, 4]], dtype=tf.int32) visual_feats = tf.random.uniform((batch_size, num_visual_features, self.config.visual_feat_dim)) visual_pos = tf.random.uniform((batch_size, num_visual_features, 4)) @@ -817,12 +817,12 @@ class TFLxmertPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), "visual_feats": tf.TensorSpec((None, None, None), tf.float32, name="visual_feats"), "visual_pos": tf.TensorSpec((None, None, None), tf.float32, name="visual_pos"), - "visual_attention_mask": tf.TensorSpec((None, None), tf.int64, name="visual_attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int64, name="token_type_ids"), + "visual_attention_mask": tf.TensorSpec((None, None), tf.int32, name="visual_attention_mask"), + "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), } ] ) @@ -1246,7 +1246,7 @@ class TFLxmertForPreTraining(TFLxmertPreTrainedModel): """ batch_size = 2 num_visual_features = 10 - input_ids = tf.constant([[3, 5, 6], [2, 3, 4]]) + input_ids = tf.constant([[3, 5, 6], [2, 3, 4]], dtype=tf.int32) visual_feats = tf.random.uniform((batch_size, num_visual_features, self.config.visual_feat_dim)) visual_pos = tf.random.uniform((batch_size, num_visual_features, 4)) diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index b040647fb6..a3c8fe62fb 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -507,7 +507,7 @@ class TFMarianPreTrainedModel(TFPreTrainedModel): decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) dummy_inputs = { "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.math.not_equal(input_ids, pad_token), + "attention_mask": tf.cast(input_ids != pad_token, tf.int32), "input_ids": input_ids, } return dummy_inputs diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 16106ad30b..05e7a23076 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -474,7 +474,7 @@ class TFMBartPreTrainedModel(TFPreTrainedModel): decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) dummy_inputs = { "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.math.not_equal(input_ids, pad_token), + "attention_mask": tf.cast(input_ids != pad_token, tf.int32), "input_ids": input_ids, } return dummy_inputs diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index f6138b7ce5..38f7f4474c 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -1549,7 +1549,7 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward( diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index 5aca157571..4bc39ff0a2 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -79,8 +79,8 @@ class TFMPNetPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) @@ -992,7 +992,7 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss): Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 74aa798dfa..6d848947a0 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -379,8 +379,8 @@ class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index 43cc952387..7e92177f6c 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -415,9 +415,9 @@ class TFOPTPreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): pad_token = 1 - input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) + input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) dummy_inputs = { - "attention_mask": tf.math.not_equal(input_ids, pad_token), + "attention_mask": tf.cast(input_ids != pad_token, tf.int32), "input_ids": input_ids, } return dummy_inputs @@ -425,8 +425,8 @@ class TFOPTPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index f89df8d95d..3d40ae661f 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -505,11 +505,11 @@ class TFPegasusPreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): pad_token = 1 - input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) + input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) + decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) dummy_inputs = { "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.math.not_equal(input_ids, pad_token), + "attention_mask": tf.cast(input_ids != pad_token, tf.int32), "input_ids": input_ids, } return dummy_inputs diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index ce1c45d479..3ca26dd670 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -830,7 +830,7 @@ class TFRemBertPreTrainedModel(TFPreTrainedModel): Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS)} + dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized if self.config.add_cross_attention: batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape @@ -1361,7 +1361,7 @@ class TFRemBertForMultipleChoice(TFRemBertPreTrainedModel, TFMultipleChoiceLoss) Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @@ -1445,9 +1445,9 @@ class TFRemBertForMultipleChoice(TFRemBertPreTrainedModel, TFMultipleChoiceLoss) @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"), + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 4b34cadbf1..9488412699 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -795,7 +795,7 @@ class TFRobertaPreTrainedModel(TFPreTrainedModel): Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS)} + dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized if self.config.add_cross_attention: batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape @@ -808,8 +808,8 @@ class TFRobertaPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) @@ -1436,7 +1436,7 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss) Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index 437aca26a0..1ca96b5856 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -1141,7 +1141,7 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward( @@ -1221,9 +1221,9 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"), + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 364b515024..da25fa5828 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -595,9 +595,9 @@ class TFSpeech2TextPreTrainedModel(TFPreTrainedModel): input_signature=[ { "input_features": tf.TensorSpec((None, None, None), tf.float32, name="input_features"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int64, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int64, name="decoder_attention_mask"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), } ] ) diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index cc269bcdde..ff20ff1d17 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -878,8 +878,8 @@ class TFT5PreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): - inputs = tf.constant(DUMMY_INPUTS) - input_mask = tf.constant(DUMMY_MASK) + inputs = tf.constant(DUMMY_INPUTS, dtype=tf.int32) + input_mask = tf.constant(DUMMY_MASK, dtype=tf.int32) dummy_inputs = { "input_ids": inputs, "decoder_input_ids": inputs, @@ -890,10 +890,10 @@ class TFT5PreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int64, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int64, name="decoder_attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), } ] ) @@ -1575,7 +1575,7 @@ class TFT5EncoderModel(TFT5PreTrainedModel): @property def dummy_inputs(self): - return {"input_ids": tf.constant(DUMMY_INPUTS)} + return {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} def get_encoder(self): return self.encoder diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index 48c26a138d..ea379a039d 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -865,9 +865,9 @@ class TFTapasPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"), + "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index e1e23773a2..c3da50d820 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -687,7 +687,7 @@ class TFTransfoXLPreTrainedModel(TFPreTrainedModel): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), } ] ) diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index 18455d5a5c..4133dcba67 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -261,7 +261,7 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - decoder_input_ids = tf.constant(DUMMY_INPUTS) + decoder_input_ids = tf.constant(DUMMY_INPUTS, dtype=tf.int32) batch_size, seq_len = decoder_input_ids.shape VISION_DUMMY_INPUTS = tf.random.uniform( diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index 6fee92e672..58110b5120 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -1345,8 +1345,8 @@ class TFWav2Vec2PreTrainedModel(TFPreTrainedModel): input_signature=[ { "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int64, name="token_type_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 03a7ca2737..5e684a72e3 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -474,15 +474,15 @@ class TFWhisperPreTrainedModel(TFPreTrainedModel): self.main_input_name: tf.random.uniform( [2, self.config.num_mel_bins, self.config.max_source_positions * 2 - 1], dtype=tf.float32 ), - "decoder_input_ids": tf.constant([[2, 3]], dtype=tf.int64), + "decoder_input_ids": tf.constant([[2, 3]], dtype=tf.int32), } @tf.function( input_signature=[ { "input_features": tf.TensorSpec((None, None, None), tf.float32, name="input_features"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int64, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int64, name="decoder_attention_mask"), + "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), } ] ) diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index 884470dfe4..95e9d22840 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -639,15 +639,15 @@ class TFXGLMPreTrainedModel(TFPreTrainedModel): input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) dummy_inputs = { "input_ids": input_ids, - "attention_mask": tf.math.not_equal(input_ids, pad_token), + "attention_mask": tf.cast(input_ids != pad_token, tf.int32), } return dummy_inputs @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int64, name="attention_mask"), + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), } ] ) diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index f6988df94d..f910ff2fde 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -533,13 +533,13 @@ class TFXLMPreTrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): # Sometimes XLM has language embeddings so don't forget to build them as well if needed - inputs_list = tf.constant([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]]) - attns_list = tf.constant([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]]) + inputs_list = tf.constant([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]], dtype=tf.int32) + attns_list = tf.constant([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int32) if self.config.use_lang_emb and self.config.n_langs > 1: return { "input_ids": inputs_list, "attention_mask": attns_list, - "langs": tf.constant([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]]), + "langs": tf.constant([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int32), } else: return {"input_ids": inputs_list, "attention_mask": attns_list} @@ -1006,12 +1006,12 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss): # Sometimes XLM has language embeddings so don't forget to build them as well if needed if self.config.use_lang_emb and self.config.n_langs > 1: return { - "input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS), - "langs": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS), + "input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32), + "langs": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32), } else: return { - "input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS), + "input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32), } @unpack_inputs diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 886d956f0b..7158f3606c 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -1486,7 +1486,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss): Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} @unpack_inputs @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @@ -1573,9 +1573,9 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss): @tf.function( input_signature=[ { - "input_ids": tf.TensorSpec((None, None, None), tf.int64, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int64, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int64, name="token_type_ids"), + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), } ] ) diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index 3e9802e205..95770c8269 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -821,7 +821,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel): Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS)} + dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int64)} # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized if self.config.add_cross_attention: batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape @@ -1365,7 +1365,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(TF{{cookiecutter.c Returns: tf.Tensor with dummy inputs """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS)} + return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int64)} @unpack_inputs @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 15d4357f0b..679776dd8c 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -1643,7 +1643,7 @@ class TFModelTesterMixin: if metrics: self.assertTrue(len(accuracy1) == len(accuracy3) > 0, "Missing metrics!") - def test_int64_inputs(self): + def test_int_support(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: prepared_for_class = self._prepare_for_class( @@ -1662,6 +1662,26 @@ class TFModelTesterMixin: } model = model_class(config) model(**prepared_for_class) # No assertion, we're just checking this doesn't throw an error + int32_prepared_for_class = { + key: tf.cast(tensor, tf.int32) if isinstance(tensor, tf.Tensor) and tensor.dtype.is_integer else tensor + for key, tensor in prepared_for_class.items() + } + model(**int32_prepared_for_class) # No assertion, we're just checking this doesn't throw an error + + # After testing that the model accepts all int inputs, confirm that its dummies are int32 + for key, tensor in model.dummy_inputs.items(): + self.assertTrue(isinstance(tensor, tf.Tensor), "Dummy inputs should be tf.Tensor!") + if tensor.dtype.is_integer: + self.assertTrue(tensor.dtype == tf.int32, "Integer dummy inputs should be tf.int32!") + + # Also confirm that the serving sig uses int32 + if hasattr(model, "serving"): + serving_sig = model.serving.input_signature + for key, tensor_spec in serving_sig[0].items(): + if tensor_spec.dtype.is_integer: + self.assertTrue( + tensor_spec.dtype == tf.int32, "Serving signatures should use tf.int32 for ints!" + ) def test_generate_with_headmasking(self): attention_names = ["encoder_attentions", "decoder_attentions", "cross_attentions"] @@ -2005,9 +2025,9 @@ class UtilsFunctionsTest(unittest.TestCase): return pixel_values, output_attentions, output_hidden_states, return_dict dummy_model = DummyModel() - input_ids = tf.constant([0, 1, 2, 3], dtype=tf.int64) - past_key_values = tf.constant([4, 5, 6, 7], dtype=tf.int64) - pixel_values = tf.constant([8, 9, 10, 11], dtype=tf.int64) + input_ids = tf.constant([0, 1, 2, 3], dtype=tf.int32) + past_key_values = tf.constant([4, 5, 6, 7], dtype=tf.int32) + pixel_values = tf.constant([8, 9, 10, 11], dtype=tf.int32) # test case 1: Pass inputs as keyword arguments; Booleans are inherited from the config. output = dummy_model.call(input_ids=input_ids, past_key_values=past_key_values) diff --git a/tests/utils/test_modeling_tf_core.py b/tests/utils/test_modeling_tf_core.py index 0863528708..3bc015febd 100644 --- a/tests/utils/test_modeling_tf_core.py +++ b/tests/utils/test_modeling_tf_core.py @@ -218,6 +218,11 @@ class TFCoreModelTesterMixin: model = model_class(config) num_out = len(model(class_inputs_dict)) + for key in class_inputs_dict.keys(): + # Check it's a tensor, in case the inputs dict has some bools in it too + if isinstance(class_inputs_dict[key], tf.Tensor) and class_inputs_dict[key].dtype.is_integer: + class_inputs_dict[key] = tf.cast(class_inputs_dict[key], tf.int32) + with tempfile.TemporaryDirectory() as tmpdirname: model.save_pretrained(tmpdirname, saved_model=True) saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")