From f8b25744163d54612ae12203160e03623ddc9c09 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 24 May 2023 13:52:52 +0100 Subject: [PATCH] Better TF docstring types (#23477) * Rework TF type hints to use | None instead of Optional[] for tf.Tensor * Rework TF type hints to use | None instead of Optional[] for tf.Tensor * Don't forget the imports * Add the imports to tests too * make fixup * Refactor tests that depended on get_type_hints * Better test refactor * Fix an old hidden bug in the test_keras_fit input creation code * Fix for the Deit tests --- src/transformers/modeling_tf_outputs.py | 190 ++++++++--------- src/transformers/modeling_tf_utils.py | 4 +- .../models/albert/modeling_tf_albert.py | 119 ++++++----- .../models/bart/modeling_tf_bart.py | 136 ++++++------ .../models/bert/modeling_tf_bert.py | 169 +++++++-------- .../blenderbot/modeling_tf_blenderbot.py | 72 ++++--- .../modeling_tf_blenderbot_small.py | 74 +++---- .../models/blip/modeling_tf_blip.py | 92 ++++---- .../models/blip/modeling_tf_blip_text.py | 13 +- .../models/camembert/modeling_tf_camembert.py | 137 ++++++------ .../models/clip/modeling_tf_clip.py | 52 ++--- .../models/convbert/modeling_tf_convbert.py | 78 +++---- .../models/convnext/modeling_tf_convnext.py | 10 +- .../models/ctrl/modeling_tf_ctrl.py | 55 ++--- .../models/cvt/modeling_tf_cvt.py | 12 +- .../data2vec/modeling_tf_data2vec_vision.py | 35 +-- .../models/deberta/modeling_tf_deberta.py | 72 ++++--- .../deberta_v2/modeling_tf_deberta_v2.py | 72 ++++--- .../models/deit/modeling_tf_deit.py | 36 ++-- .../distilbert/modeling_tf_distilbert.py | 63 +++--- .../models/dpr/modeling_tf_dpr.py | 54 ++--- .../models/electra/modeling_tf_electra.py | 135 ++++++------ .../modeling_tf_encoder_decoder.py | 21 +- .../models/esm/modeling_tf_esm.py | 81 +++---- .../models/flaubert/modeling_tf_flaubert.py | 129 ++++++------ .../models/funnel/modeling_tf_funnel.py | 83 ++++---- .../models/gpt2/modeling_tf_gpt2.py | 86 ++++---- .../models/gptj/modeling_tf_gptj.py | 76 +++---- .../models/groupvit/modeling_tf_groupvit.py | 56 ++--- .../models/hubert/modeling_tf_hubert.py | 59 +++--- .../models/layoutlm/modeling_tf_layoutlm.py | 117 +++++----- .../layoutlmv3/modeling_tf_layoutlmv3.py | 133 ++++++------ .../models/led/modeling_tf_led.py | 84 ++++---- .../longformer/modeling_tf_longformer.py | 149 ++++++------- .../models/lxmert/modeling_tf_lxmert.py | 51 ++--- .../models/marian/modeling_tf_marian.py | 122 +++++------ .../models/mbart/modeling_tf_mbart.py | 110 +++++----- .../mobilebert/modeling_tf_mobilebert.py | 123 +++++------ .../models/mobilevit/modeling_tf_mobilevit.py | 14 +- .../models/mpnet/modeling_tf_mpnet.py | 56 ++--- .../models/openai/modeling_tf_openai.py | 72 ++++--- .../models/opt/modeling_tf_opt.py | 52 ++--- .../models/pegasus/modeling_tf_pegasus.py | 112 +++++----- .../models/rag/modeling_tf_rag.py | 121 +++++------ .../models/rembert/modeling_tf_rembert.py | 130 ++++++------ .../models/roberta/modeling_tf_roberta.py | 137 ++++++------ .../modeling_tf_roberta_prelayernorm.py | 137 ++++++------ .../models/roformer/modeling_tf_roformer.py | 96 ++++----- .../models/sam/modeling_tf_sam.py | 45 ++-- .../models/segformer/modeling_tf_segformer.py | 9 +- .../modeling_tf_speech_to_text.py | 62 +++--- .../models/swin/modeling_tf_swin.py | 72 ++++--- src/transformers/models/t5/modeling_tf_t5.py | 49 +++-- .../models/tapas/modeling_tf_tapas.py | 99 ++++----- .../transfo_xl/modeling_tf_transfo_xl.py | 54 ++--- .../modeling_tf_vision_encoder_decoder.py | 12 +- .../modeling_tf_vision_text_dual_encoder.py | 12 +- .../models/vit/modeling_tf_vit.py | 16 +- .../models/vit_mae/modeling_tf_vit_mae.py | 31 +-- .../models/wav2vec2/modeling_tf_wav2vec2.py | 63 +++--- .../models/whisper/modeling_tf_whisper.py | 54 ++--- .../models/xglm/modeling_tf_xglm.py | 80 +++---- .../models/xlm/modeling_tf_xlm.py | 97 ++++----- .../xlm_roberta/modeling_tf_xlm_roberta.py | 137 ++++++------ .../models/xlnet/modeling_tf_xlnet.py | 199 +++++++++--------- ...tf_{{cookiecutter.lowercase_modelname}}.py | 156 +++++++------- tests/generation/test_tf_logits_process.py | 2 + tests/generation/test_tf_utils.py | 2 + .../models/albert/test_modeling_tf_albert.py | 2 + tests/models/auto/test_modeling_tf_auto.py | 2 + tests/models/auto/test_modeling_tf_pytorch.py | 2 + tests/models/bart/test_modeling_tf_bart.py | 2 + tests/models/bert/test_modeling_tf_bert.py | 2 + .../blenderbot/test_modeling_tf_blenderbot.py | 2 + .../test_modeling_tf_blenderbot_small.py | 2 + tests/models/blip/test_modeling_tf_blip.py | 2 + .../models/blip/test_modeling_tf_blip_text.py | 2 + tests/models/bort/test_modeling_tf_bort.py | 2 + .../camembert/test_modeling_tf_camembert.py | 2 + tests/models/clip/test_modeling_tf_clip.py | 2 + .../convbert/test_modeling_tf_convbert.py | 2 + .../convnext/test_modeling_tf_convnext.py | 2 + tests/models/ctrl/test_modeling_tf_ctrl.py | 2 + tests/models/cvt/test_modeling_tf_cvt.py | 2 + .../test_modeling_tf_data2vec_vision.py | 2 + .../deberta/test_modeling_tf_deberta.py | 2 + .../deberta_v2/test_modeling_tf_deberta_v2.py | 2 + tests/models/deit/test_modeling_tf_deit.py | 4 +- .../distilbert/test_modeling_tf_distilbert.py | 2 + tests/models/dpr/test_modeling_tf_dpr.py | 2 + .../electra/test_modeling_tf_electra.py | 2 + .../test_modeling_tf_encoder_decoder.py | 2 + tests/models/esm/test_modeling_tf_esm.py | 2 + .../flaubert/test_modeling_tf_flaubert.py | 2 + .../models/funnel/test_modeling_tf_funnel.py | 2 + tests/models/gpt2/test_modeling_tf_gpt2.py | 2 + tests/models/gptj/test_modeling_tf_gptj.py | 2 + .../groupvit/test_modeling_tf_groupvit.py | 2 + .../models/hubert/test_modeling_tf_hubert.py | 2 + .../layoutlm/test_modeling_tf_layoutlm.py | 2 + .../layoutlmv3/test_modeling_tf_layoutlmv3.py | 2 + tests/models/led/test_modeling_tf_led.py | 2 + .../longformer/test_modeling_tf_longformer.py | 2 + .../models/lxmert/test_modeling_tf_lxmert.py | 2 + .../models/marian/test_modeling_tf_marian.py | 2 + tests/models/mbart/test_modeling_tf_mbart.py | 2 + .../mobilebert/test_modeling_tf_mobilebert.py | 2 + .../mobilevit/test_modeling_tf_mobilevit.py | 2 + tests/models/mpnet/test_modeling_tf_mpnet.py | 2 + tests/models/mt5/test_modeling_tf_mt5.py | 2 + .../models/openai/test_modeling_tf_openai.py | 2 + tests/models/opt/test_modeling_tf_opt.py | 2 + .../pegasus/test_modeling_tf_pegasus.py | 2 + tests/models/rag/test_modeling_tf_rag.py | 2 + .../models/regnet/test_modeling_tf_regnet.py | 2 + .../rembert/test_modeling_tf_rembert.py | 2 + .../models/resnet/test_modeling_tf_resnet.py | 2 + .../roberta/test_modeling_tf_roberta.py | 2 + .../test_modeling_tf_roberta_prelayernorm.py | 2 + .../roformer/test_modeling_tf_roformer.py | 2 + tests/models/sam/test_modeling_tf_sam.py | 2 + .../segformer/test_modeling_tf_segformer.py | 2 + .../test_modeling_tf_speech_to_text.py | 2 + tests/models/swin/test_modeling_tf_swin.py | 2 + tests/models/t5/test_modeling_tf_t5.py | 2 + tests/models/tapas/test_modeling_tf_tapas.py | 2 + .../transfo_xl/test_modeling_tf_transfo_xl.py | 2 + ...test_modeling_tf_vision_encoder_decoder.py | 2 + ...st_modeling_tf_vision_text_dual_encoder.py | 2 + tests/models/vit/test_modeling_tf_vit.py | 2 + .../vit_mae/test_modeling_tf_vit_mae.py | 2 + .../wav2vec2/test_modeling_tf_wav2vec2.py | 2 + .../whisper/test_modeling_tf_whisper.py | 2 + tests/models/xglm/test_modeling_tf_xglm.py | 2 + tests/models/xlm/test_modeling_tf_xlm.py | 2 + .../test_modeling_tf_xlm_roberta.py | 2 + tests/models/xlnet/test_modeling_tf_xlnet.py | 2 + tests/test_modeling_tf_common.py | 48 ++--- tests/utils/test_modeling_tf_core.py | 2 + 139 files changed, 2907 insertions(+), 2621 deletions(-) diff --git a/src/transformers/modeling_tf_outputs.py b/src/transformers/modeling_tf_outputs.py index f8148b1695..357c34bc1f 100644 --- a/src/transformers/modeling_tf_outputs.py +++ b/src/transformers/modeling_tf_outputs.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import List, Optional, Tuple @@ -43,8 +45,8 @@ class TFBaseModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -96,8 +98,8 @@ class TFBaseModelOutputWithPooling(ModelOutput): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -164,10 +166,10 @@ class TFBaseModelOutputWithPoolingAndCrossAttentions(ModelOutput): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -201,9 +203,9 @@ class TFBaseModelOutputWithPast(ModelOutput): """ last_hidden_state: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -234,9 +236,9 @@ class TFBaseModelOutputWithCrossAttentions(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -276,10 +278,10 @@ class TFBaseModelOutputWithPastAndCrossAttentions(ModelOutput): """ last_hidden_state: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -333,13 +335,13 @@ class TFSeq2SeqModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -365,10 +367,10 @@ class TFCausalLMOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -400,11 +402,11 @@ class TFCausalLMOutputWithPast(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -442,12 +444,12 @@ class TFCausalLMOutputWithCrossAttentions(ModelOutput): `past_key_values` input) to speed up sequential decoding. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -473,10 +475,10 @@ class TFMaskedLMOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -527,15 +529,15 @@ class TFSeq2SeqLMOutput(ModelOutput): self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -562,10 +564,10 @@ class TFNextSentencePredictorOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -591,10 +593,10 @@ class TFSequenceClassifierOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -642,15 +644,15 @@ class TFSeq2SeqSequenceClassifierOutput(ModelOutput): self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -684,10 +686,10 @@ class TFSemanticSegmenterOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -716,9 +718,9 @@ class TFSemanticSegmenterOutputWithNoAttention(ModelOutput): Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None @dataclass @@ -742,10 +744,10 @@ class TFImageClassifierOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -773,10 +775,10 @@ class TFMultipleChoiceModelOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -802,10 +804,10 @@ class TFTokenClassifierOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -833,11 +835,11 @@ class TFQuestionAnsweringModelOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -884,15 +886,15 @@ class TFSeq2SeqQuestionAnsweringModelOutput(ModelOutput): self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -924,11 +926,11 @@ class TFSequenceClassifierOutputWithPast(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -947,7 +949,7 @@ class TFImageClassifierOutputWithNoAttention(ModelOutput): feature maps) of the model at the output of each stage. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None hidden_states: Optional[Tuple[tf.Tensor, ...]] = None @@ -974,10 +976,10 @@ class TFMaskedImageModelingOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None reconstruction: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @property def logits(self): diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index c58926f476..c2b0485b5f 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -15,6 +15,8 @@ # limitations under the License. """TF general model utils.""" +from __future__ import annotations + import functools import gc import inspect @@ -1154,7 +1156,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu """ return cls(config, **kwargs) - def get_head_mask(self, head_mask: Optional[tf.Tensor], num_hidden_layers: int) -> tf.Tensor: + def get_head_mask(self, head_mask: tf.Tensor | None, num_hidden_layers: int) -> tf.Tensor: """ Prepare the head mask if needed. diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index c7f76b175b..57e2414e72 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 ALBERT model.""" + +from __future__ import annotations + import math from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -561,12 +564,12 @@ class TFAlbertMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -676,8 +679,8 @@ class TFAlbertForPreTrainingOutput(ModelOutput): loss: tf.Tensor = None prediction_logits: tf.Tensor = None sop_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None ALBERT_START_DOCSTRING = r""" @@ -797,12 +800,12 @@ class TFAlbertModel(TFAlbertPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -863,17 +866,17 @@ class TFAlbertForPreTraining(TFAlbertPreTrainedModel, TFAlbertPreTrainingLoss): @replace_return_docstrings(output_type=TFAlbertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, - sentence_order_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, + sentence_order_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFAlbertForPreTrainingOutput, Tuple[tf.Tensor]]: r""" @@ -979,16 +982,16 @@ class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss) @replace_return_docstrings(output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1097,16 +1100,16 @@ class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel, TFSequenceClass ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1189,16 +1192,16 @@ class TFAlbertForTokenClassification(TFAlbertPreTrainedModel, TFTokenClassificat ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1276,17 +1279,17 @@ class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel, TFQuestionAnsweringL ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -1386,16 +1389,16 @@ class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel, TFMultipleChoiceLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 39537b88bf..5690e022ad 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -15,6 +15,8 @@ """ TF 2.0 Bart model.""" +from __future__ import annotations + import random from typing import Optional, Tuple, Union @@ -131,7 +133,7 @@ class TFBartLearnedPositionalEmbedding(tf.keras.layers.Embedding): self, input_shape: Optional[tf.TensorShape] = None, past_key_values_length: int = 0, - position_ids: Optional[tf.Tensor] = None, + position_ids: tf.Tensor | None = None, ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -180,12 +182,12 @@ class TFBartAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -314,8 +316,8 @@ class TFBartEncoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]], - layer_head_mask: Optional[tf.Tensor], + attention_mask: np.ndarray | tf.Tensor | None, + layer_head_mask: tf.Tensor | None, training: Optional[bool] = False, ) -> tf.Tensor: """ @@ -383,11 +385,11 @@ class TFBartDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, past_key_value: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: @@ -700,10 +702,10 @@ class TFBartEncoder(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -851,14 +853,14 @@ class TFBartDecoder(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1073,18 +1075,18 @@ class TFBartMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1187,18 +1189,18 @@ class TFBartModel(TFBartPretrainedModel): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1311,23 +1313,23 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel, TFCausalLanguageMode @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: r""" @@ -1481,23 +1483,23 @@ class TFBartForSequenceClassification(TFBartPretrainedModel, TFSequenceClassific @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSeq2SeqSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index 50ff7f2ddd..df78d03a00 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 BERT model.""" + +from __future__ import annotations + import math import warnings from dataclasses import dataclass @@ -452,9 +455,9 @@ class TFBertLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -530,9 +533,9 @@ class TFBertEncoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -735,14 +738,14 @@ class TFBertMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -943,7 +946,7 @@ class TFBertForPreTrainingOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None prediction_logits: tf.Tensor = None seq_relationship_logits: tf.Tensor = None hidden_states: Optional[Union[Tuple[tf.Tensor], tf.Tensor]] = None @@ -1067,14 +1070,14 @@ class TFBertModel(TFBertPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1175,17 +1178,17 @@ class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss): @replace_return_docstrings(output_type=TFBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, - next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, + next_sentence_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFBertForPreTrainingOutput, Tuple[tf.Tensor]]: r""" @@ -1304,16 +1307,16 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1401,20 +1404,20 @@ class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, **kwargs, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: @@ -1513,16 +1516,16 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel, TFNextSentencePredi @replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + next_sentence_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFNextSentencePredictorOutput, Tuple[tf.Tensor]]: r""" @@ -1621,16 +1624,16 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel, TFSequenceClassific ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1714,16 +1717,16 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel, TFMultipleChoiceLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1848,16 +1851,16 @@ class TFBertForTokenClassification(TFBertPreTrainedModel, TFTokenClassificationL ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1941,17 +1944,17 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss) ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index ee5755c203..66f00d89f8 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -15,6 +15,8 @@ """ TF 2.0 Blenderbot model.""" +from __future__ import annotations + import os import random import warnings @@ -126,7 +128,7 @@ class TFBlenderbotLearnedPositionalEmbedding(tf.keras.layers.Embedding): super().__init__(num_embeddings, embedding_dim, **kwargs) def call( - self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: Optional[tf.Tensor] = None + self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: tf.Tensor | None = None ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -175,12 +177,12 @@ class TFBlenderbotAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -380,12 +382,12 @@ class TFBlenderbotDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -1183,18 +1185,18 @@ class TFBlenderbotModel(TFBlenderbotPreTrainedModel): ) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[List[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: List[tf.Tensor] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1327,23 +1329,23 @@ class TFBlenderbotForConditionalGeneration(TFBlenderbotPreTrainedModel, TFCausal @add_end_docstrings(BLENDERBOT_GENERATION_EXAMPLE) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[List[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: List[tf.Tensor] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]: r""" diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index e170085e91..541024470d 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -15,6 +15,8 @@ """ TF 2.0 BlenderbotSmall model.""" +from __future__ import annotations + import random from typing import List, Optional, Tuple, Union @@ -126,7 +128,7 @@ class TFBlenderbotSmallLearnedPositionalEmbedding(tf.keras.layers.Embedding): super().__init__(num_embeddings, embedding_dim, **kwargs) def call( - self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: Optional[tf.Tensor] = None + self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: tf.Tensor | None = None ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -175,12 +177,12 @@ class TFBlenderbotSmallAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -310,8 +312,8 @@ class TFBlenderbotSmallEncoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]], - layer_head_mask: Optional[tf.Tensor], + attention_mask: np.ndarray | tf.Tensor | None, + layer_head_mask: tf.Tensor | None, training: Optional[bool] = False, ) -> tf.Tensor: """ @@ -380,11 +382,11 @@ class TFBlenderbotSmallDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, past_key_value: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: @@ -1175,18 +1177,18 @@ class TFBlenderbotSmallModel(TFBlenderbotSmallPreTrainedModel): ) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[List[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: List[tf.Tensor] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1303,23 +1305,23 @@ class TFBlenderbotSmallForConditionalGeneration(TFBlenderbotSmallPreTrainedModel @add_end_docstrings(BLENDERBOT_SMALL_GENERATION_EXAMPLE) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, - past_key_values: Optional[List[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: List[tf.Tensor] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple[tf.Tensor], TFSeq2SeqLMOutput]: r""" diff --git a/src/transformers/models/blip/modeling_tf_blip.py b/src/transformers/models/blip/modeling_tf_blip.py index ae32c2f32a..95269e4351 100644 --- a/src/transformers/models/blip/modeling_tf_blip.py +++ b/src/transformers/models/blip/modeling_tf_blip.py @@ -14,6 +14,8 @@ # limitations under the License. """ TensorFlow BLIP model.""" +from __future__ import annotations + from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple, Union @@ -102,12 +104,12 @@ class TFBlipForConditionalGenerationModelOutput(ModelOutput): heads.` """ - loss: Optional[Tuple[tf.Tensor]] = None - decoder_logits: Optional[Tuple[tf.Tensor]] = None - image_embeds: Optional[tf.Tensor] = None + loss: Tuple[tf.Tensor] | None = None + decoder_logits: Tuple[tf.Tensor] | None = None + image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -136,11 +138,11 @@ class TFBlipTextVisionModelOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None - image_embeds: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None + image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -176,14 +178,14 @@ class TFBlipImageTextMatchingModelOutput(ModelOutput): The question embeddings obtained by the text projection layer. """ - itm_score: Optional[tf.Tensor] = None - loss: Optional[tf.Tensor] = None - image_embeds: Optional[tf.Tensor] = None + itm_score: tf.Tensor | None = None + loss: tf.Tensor | None = None + image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - vision_pooler_output: Optional[tf.Tensor] = None - attentions: Optional[Tuple[tf.Tensor]] = None - question_embeds: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + vision_pooler_output: tf.Tensor | None = None + attentions: Tuple[tf.Tensor] | None = None + question_embeds: Tuple[tf.Tensor] | None = None @dataclass @@ -208,7 +210,7 @@ class TFBlipOutput(ModelOutput): The output of the [`BlipVisionModel`]. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits_per_image: tf.Tensor = None logits_per_text: tf.Tensor = None text_embeds: tf.Tensor = None @@ -359,10 +361,10 @@ class TFBlipAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: Optional[bool] = None, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor], Optional[Tuple[tf.Tensor]]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None, Tuple[tf.Tensor] | None]: """Input shape: Batch x Time x Channel""" bsz, tgt_len, embed_dim = shape_list(hidden_states) @@ -573,7 +575,7 @@ class TFBlipEncoder(tf.keras.layers.Layer): def call( self, inputs_embeds, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -694,7 +696,7 @@ class TFBlipVisionModel(TFBlipPreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=BlipVisionConfig) def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -799,10 +801,10 @@ class TFBlipMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -930,10 +932,10 @@ class TFBlipModel(TFBlipPreTrainedModel): @replace_return_docstrings(output_type=TFBlipOutput, config_class=BlipConfig) def call( self, - input_ids: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -980,9 +982,9 @@ class TFBlipModel(TFBlipPreTrainedModel): @add_start_docstrings_to_model_forward(BLIP_TEXT_INPUTS_DOCSTRING) def get_text_features( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, return_dict: Optional[bool] = None, ) -> tf.Tensor: r""" @@ -1018,7 +1020,7 @@ class TFBlipModel(TFBlipPreTrainedModel): @add_start_docstrings_to_model_forward(BLIP_VISION_INPUTS_DOCSTRING) def get_image_features( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, return_dict: Optional[bool] = None, ) -> tf.Tensor: r""" @@ -1128,11 +1130,11 @@ class TFBlipForConditionalGeneration(TFBlipPreTrainedModel): def call( self, pixel_values: tf.Tensor, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: Optional[bool] = None, ) -> Union[Tuple, TFBlipForConditionalGenerationModelOutput]: @@ -1197,8 +1199,8 @@ class TFBlipForConditionalGeneration(TFBlipPreTrainedModel): def generate( self, pixel_values: tf.Tensor, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, **generate_kwargs, ) -> tf.Tensor: r""" @@ -1342,13 +1344,13 @@ class TFBlipForQuestionAnswering(TFBlipPreTrainedModel): self, input_ids: tf.Tensor, pixel_values: tf.Tensor, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, foutput_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: Optional[bool] = None, ) -> Union[Tuple, TFBlipTextVisionModelOutput]: @@ -1451,7 +1453,7 @@ class TFBlipForQuestionAnswering(TFBlipPreTrainedModel): self, input_ids: tf.Tensor, pixel_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, **generate_kwargs, ) -> tf.Tensor: r""" @@ -1624,9 +1626,9 @@ class TFBlipForImageTextRetrieval(TFBlipPreTrainedModel): def call( self, input_ids: tf.Tensor, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, use_itm_head: Optional[bool] = True, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/blip/modeling_tf_blip_text.py b/src/transformers/models/blip/modeling_tf_blip_text.py index 02f592c259..bff8122337 100644 --- a/src/transformers/models/blip/modeling_tf_blip_text.py +++ b/src/transformers/models/blip/modeling_tf_blip_text.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. + +from __future__ import annotations + import math from typing import Dict, Optional, Tuple @@ -277,11 +280,11 @@ class TFBlipTextAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, output_attentions: Optional[bool] = False, training: Optional[bool] = None, ): diff --git a/src/transformers/models/camembert/modeling_tf_camembert.py b/src/transformers/models/camembert/modeling_tf_camembert.py index c9e4c98c14..980462f4be 100644 --- a/src/transformers/models/camembert/modeling_tf_camembert.py +++ b/src/transformers/models/camembert/modeling_tf_camembert.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 CamemBERT model.""" + +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -526,9 +529,9 @@ class TFCamembertLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -605,9 +608,9 @@ class TFCamembertEncoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -705,14 +708,14 @@ class TFCamembertMainLayer(tf.keras.layers.Layer): # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.call def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -922,14 +925,14 @@ class TFCamembertModel(TFCamembertPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1085,16 +1088,16 @@ class TFCamembertForMaskedLM(TFCamembertPreTrainedModel, TFMaskedLanguageModelin ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1199,16 +1202,16 @@ class TFCamembertForSequenceClassification(TFCamembertPreTrainedModel, TFSequenc ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1290,16 +1293,16 @@ class TFCamembertForTokenClassification(TFCamembertPreTrainedModel, TFTokenClass ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1387,16 +1390,16 @@ class TFCamembertForMultipleChoice(TFCamembertPreTrainedModel, TFMultipleChoiceL ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1499,17 +1502,17 @@ class TFCamembertForQuestionAnswering(TFCamembertPreTrainedModel, TFQuestionAnsw ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -1615,20 +1618,20 @@ class TFCamembertForCausalLM(TFCamembertPreTrainedModel, TFCausalLanguageModelin ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index 7cf52500ae..9b7976f413 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -15,6 +15,8 @@ """ TF 2.0 CLIP model.""" +from __future__ import annotations + import math from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple, Union @@ -111,7 +113,7 @@ class TFCLIPOutput(ModelOutput): The output of the [`TFCLIPVisionModel`]. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits_per_image: tf.Tensor = None logits_per_text: tf.Tensor = None text_embeds: tf.Tensor = None @@ -586,9 +588,9 @@ class TFCLIPTextMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -675,7 +677,7 @@ class TFCLIPVisionMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -751,9 +753,9 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): @unpack_inputs def get_text_features( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -785,7 +787,7 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): @unpack_inputs def get_image_features( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -810,10 +812,10 @@ class TFCLIPMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - pixel_values: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + pixel_values: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1050,9 +1052,9 @@ class TFCLIPTextModel(TFCLIPPreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=CLIPTextConfig) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1158,7 +1160,7 @@ class TFCLIPVisionModel(TFCLIPPreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=CLIPVisionConfig) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1260,9 +1262,9 @@ class TFCLIPModel(TFCLIPPreTrainedModel): @add_start_docstrings_to_model_forward(CLIP_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def get_text_features( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1300,7 +1302,7 @@ class TFCLIPModel(TFCLIPPreTrainedModel): @add_start_docstrings_to_model_forward(CLIP_VISION_INPUTS_DOCSTRING) def get_image_features( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1343,10 +1345,10 @@ class TFCLIPModel(TFCLIPPreTrainedModel): @replace_return_docstrings(output_type=TFCLIPOutput, config_class=CLIPConfig) def call( self, - input_ids: Optional[TFModelInputType] = None, - pixel_values: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + pixel_values: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index e853da7627..19c2d700dc 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -15,6 +15,8 @@ """ TF 2.0 ConvBERT model.""" +from __future__ import annotations + from typing import Optional, Tuple, Union import numpy as np @@ -742,12 +744,12 @@ class TFConvBertModel(TFConvBertPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, attention_mask: Optional[Union[np.array, tf.Tensor]] = None, token_type_ids: Optional[Union[np.array, tf.Tensor]] = None, position_ids: Optional[Union[np.array, tf.Tensor]] = None, head_mask: Optional[Union[np.array, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -858,16 +860,16 @@ class TFConvBertForMaskedLM(TFConvBertPreTrainedModel, TFMaskedLanguageModelingL ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMaskedLMOutput]: r""" @@ -965,16 +967,16 @@ class TFConvBertForSequenceClassification(TFConvBertPreTrainedModel, TFSequenceC ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" @@ -1057,16 +1059,16 @@ class TFConvBertForMultipleChoice(TFConvBertPreTrainedModel, TFMultipleChoiceLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMultipleChoiceModelOutput]: r""" @@ -1170,16 +1172,16 @@ class TFConvBertForTokenClassification(TFConvBertPreTrainedModel, TFTokenClassif ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFTokenClassifierOutput]: r""" @@ -1247,17 +1249,17 @@ class TFConvBertForQuestionAnswering(TFConvBertPreTrainedModel, TFQuestionAnswer ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[tf.Tensor] = None, - end_positions: Optional[tf.Tensor] = None, + start_positions: tf.Tensor | None = None, + end_positions: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFQuestionAnsweringModelOutput]: r""" diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py index 00db1f0b78..f258abe24c 100644 --- a/src/transformers/models/convnext/modeling_tf_convnext.py +++ b/src/transformers/models/convnext/modeling_tf_convnext.py @@ -15,6 +15,8 @@ """ TF 2.0 ConvNext model.""" +from __future__ import annotations + from typing import Dict, Optional, Tuple, Union import numpy as np @@ -297,7 +299,7 @@ class TFConvNextMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -458,7 +460,7 @@ class TFConvNextModel(TFConvNextPreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -543,10 +545,10 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas @replace_return_docstrings(output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index f4742b4e33..cddfd4a9e3 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 CTRL model.""" + +from __future__ import annotations + import warnings from typing import Optional, Tuple, Union @@ -256,13 +259,13 @@ class TFCTRLMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -532,13 +535,13 @@ class TFCTRLModel(TFCTRLPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -645,18 +648,18 @@ class TFCTRLLMHeadModel(TFCTRLPreTrainedModel, TFCausalLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFCausalLMOutputWithPast]: r""" @@ -749,18 +752,18 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" diff --git a/src/transformers/models/cvt/modeling_tf_cvt.py b/src/transformers/models/cvt/modeling_tf_cvt.py index 6ad86071e4..3c80f53bfa 100644 --- a/src/transformers/models/cvt/modeling_tf_cvt.py +++ b/src/transformers/models/cvt/modeling_tf_cvt.py @@ -15,6 +15,8 @@ """ TF 2.0 Cvt model.""" +from __future__ import annotations + import collections.abc from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -75,7 +77,7 @@ class TFBaseModelOutputWithCLSToken(ModelOutput): last_hidden_state: tf.Tensor = None cls_token_value: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None class TFCvtDropPath(tf.keras.layers.Layer): @@ -668,7 +670,7 @@ class TFCvtMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, @@ -797,7 +799,7 @@ class TFCvtModel(TFCvtPreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutputWithCLSToken, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, @@ -880,8 +882,8 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification @replace_return_docstrings(output_type=TFImageClassifierOutputWithNoAttention, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index 06a6f010dd..1085d6e48d 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF 2.0 Data2Vec Vision model.""" + +from __future__ import annotations + import collections.abc import math from dataclasses import dataclass @@ -94,8 +97,8 @@ class TFData2VecVisionModelOutputWithPooling(TFBaseModelOutputWithPooling): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None class TFData2VecVisionDropPath(tf.keras.layers.Layer): @@ -163,7 +166,7 @@ class TFData2VecVisionEmbeddings(tf.keras.layers.Layer): super().build(input_shape) - def call(self, pixel_values: tf.Tensor, bool_masked_pos: Optional[tf.Tensor] = None) -> tf.Tensor: + def call(self, pixel_values: tf.Tensor, bool_masked_pos: tf.Tensor | None = None) -> tf.Tensor: embeddings = self.patch_embeddings(pixel_values) batch_size, seq_len, projection_dim = shape_list(embeddings) @@ -609,7 +612,7 @@ class TFData2VecVisionEncoder(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = True, @@ -685,9 +688,9 @@ class TFData2VecVisionMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -899,9 +902,9 @@ class TFData2VecVisionModel(TFData2VecVisionPreTrainedModel): ) def call( self, - pixel_values: Optional[TFModelInputType] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -966,12 +969,12 @@ class TFData2VecVisionForImageClassification(TFData2VecVisionPreTrainedModel, TF ) def call( self, - pixel_values: Optional[TFModelInputType] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, tuple]: r""" @@ -1378,9 +1381,9 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel): @replace_return_docstrings(output_type=TFSemanticSegmenterOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index dcd0582777..7a04542618 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -15,6 +15,8 @@ """ TF 2.0 DeBERTa model.""" +from __future__ import annotations + import math from typing import Dict, Optional, Sequence, Tuple, Union @@ -922,11 +924,11 @@ class TFDebertaMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1092,11 +1094,11 @@ class TFDebertaModel(TFDebertaPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1149,15 +1151,15 @@ class TFDebertaForMaskedLM(TFDebertaPreTrainedModel, TFMaskedLanguageModelingLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1233,15 +1235,15 @@ class TFDebertaForSequenceClassification(TFDebertaPreTrainedModel, TFSequenceCla ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1314,15 +1316,15 @@ class TFDebertaForTokenClassification(TFDebertaPreTrainedModel, TFTokenClassific ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1390,16 +1392,16 @@ class TFDebertaForQuestionAnswering(TFDebertaPreTrainedModel, TFQuestionAnswerin ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index b3c210352a..82b0a30c5a 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -15,6 +15,8 @@ """ TF 2.0 DeBERTa-v2 model.""" +from __future__ import annotations + from typing import Dict, Optional, Tuple, Union import numpy as np @@ -1014,11 +1016,11 @@ class TFDebertaV2MainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1186,11 +1188,11 @@ class TFDebertaV2Model(TFDebertaV2PreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1244,15 +1246,15 @@ class TFDebertaV2ForMaskedLM(TFDebertaV2PreTrainedModel, TFMaskedLanguageModelin ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1329,15 +1331,15 @@ class TFDebertaV2ForSequenceClassification(TFDebertaV2PreTrainedModel, TFSequenc ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1411,15 +1413,15 @@ class TFDebertaV2ForTokenClassification(TFDebertaV2PreTrainedModel, TFTokenClass ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1488,16 +1490,16 @@ class TFDebertaV2ForQuestionAnswering(TFDebertaV2PreTrainedModel, TFQuestionAnsw ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py index a3d487021d..131939f5bc 100644 --- a/src/transformers/models/deit/modeling_tf_deit.py +++ b/src/transformers/models/deit/modeling_tf_deit.py @@ -15,6 +15,8 @@ """ TensorFlow DeiT model.""" +from __future__ import annotations + import collections.abc import math from dataclasses import dataclass @@ -95,8 +97,8 @@ class TFDeiTForImageClassificationWithTeacherOutput(ModelOutput): logits: tf.Tensor = None cls_logits: tf.Tensor = None distillation_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None class TFDeiTEmbeddings(tf.keras.layers.Layer): @@ -142,7 +144,7 @@ class TFDeiTEmbeddings(tf.keras.layers.Layer): super().build(input_shape) def call( - self, pixel_values: tf.Tensor, bool_masked_pos: Optional[tf.Tensor] = None, training: bool = False + self, pixel_values: tf.Tensor, bool_masked_pos: tf.Tensor | None = None, training: bool = False ) -> tf.Tensor: embeddings = self.patch_embeddings(pixel_values) batch_size, seq_length, _ = shape_list(embeddings) @@ -501,9 +503,9 @@ class TFDeiTMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -658,9 +660,9 @@ class TFDeiTModel(TFDeiTPreTrainedModel): ) def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -768,9 +770,9 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel): @replace_return_docstrings(output_type=TFMaskedImageModelingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -898,9 +900,9 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati @replace_return_docstrings(output_type=TFImageClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1016,8 +1018,8 @@ class TFDeiTForImageClassificationWithTeacher(TFDeiTPreTrainedModel): ) def call( self, - pixel_values: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 3013f4ca30..85a98c2a77 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -16,6 +16,9 @@ TF 2.0 DistilBERT model """ + +from __future__ import annotations + import warnings from typing import Optional, Tuple, Union @@ -538,10 +541,10 @@ class TFDistilBertModel(TFDistilBertPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -639,14 +642,14 @@ class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel, TFMaskedLanguageModel ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -725,14 +728,14 @@ class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel, TFSeque ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -805,14 +808,14 @@ class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel, TFTokenCla ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -897,14 +900,14 @@ class TFDistilBertForMultipleChoice(TFDistilBertPreTrainedModel, TFMultipleChoic ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1004,15 +1007,15 @@ class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel, TFQuestionAn ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index 565ad37b21..008e6a39fd 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -15,8 +15,10 @@ """ TensorFlow DPR model for Open Domain Question Answering.""" +from __future__ import annotations + from dataclasses import dataclass -from typing import Optional, Tuple, Union +from typing import Tuple, Union import tensorflow as tf @@ -80,8 +82,8 @@ class TFDPRContextEncoderOutput(ModelOutput): """ pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -108,8 +110,8 @@ class TFDPRQuestionEncoderOutput(ModelOutput): """ pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -141,8 +143,8 @@ class TFDPRReaderOutput(ModelOutput): start_logits: tf.Tensor = None end_logits: tf.Tensor = None relevance_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None class TFDPREncoderLayer(tf.keras.layers.Layer): @@ -167,9 +169,9 @@ class TFDPREncoderLayer(tf.keras.layers.Layer): def call( self, input_ids: tf.Tensor = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = None, output_hidden_states: bool = None, return_dict: bool = None, @@ -227,8 +229,8 @@ class TFDPRSpanPredictorLayer(tf.keras.layers.Layer): def call( self, input_ids: tf.Tensor = None, - attention_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = False, @@ -283,9 +285,9 @@ class TFDPRSpanPredictor(TFPreTrainedModel): def call( self, input_ids: tf.Tensor = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = False, @@ -316,9 +318,9 @@ class TFDPREncoder(TFPreTrainedModel): def call( self, input_ids: tf.Tensor = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = False, @@ -552,9 +554,9 @@ class TFDPRContextEncoder(TFDPRPretrainedContextEncoder): def call( self, input_ids=None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions=None, output_hidden_states=None, return_dict=None, @@ -639,9 +641,9 @@ class TFDPRQuestionEncoder(TFDPRPretrainedQuestionEncoder): def call( self, input_ids=None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions=None, output_hidden_states=None, return_dict=None, @@ -725,8 +727,8 @@ class TFDPRReader(TFDPRPretrainedReader): def call( self, input_ids=None, - attention_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: bool = None, output_hidden_states: bool = None, return_dict=None, diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 82c3381724..7602d43cc0 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF Electra model.""" + +from __future__ import annotations + import math import warnings from dataclasses import dataclass @@ -312,9 +315,9 @@ class TFElectraLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -391,9 +394,9 @@ class TFElectraEncoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -704,14 +707,14 @@ class TFElectraMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -824,8 +827,8 @@ class TFElectraForPreTrainingOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None ELECTRA_START_DOCSTRING = r""" @@ -941,14 +944,14 @@ class TFElectraModel(TFElectraPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1034,12 +1037,12 @@ class TFElectraForPreTraining(TFElectraPreTrainedModel): @replace_return_docstrings(output_type=TFElectraForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1171,16 +1174,16 @@ class TFElectraForMaskedLM(TFElectraPreTrainedModel, TFMaskedLanguageModelingLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1281,16 +1284,16 @@ class TFElectraForSequenceClassification(TFElectraPreTrainedModel, TFSequenceCla ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1372,16 +1375,16 @@ class TFElectraForMultipleChoice(TFElectraPreTrainedModel, TFMultipleChoiceLoss) ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1490,16 +1493,16 @@ class TFElectraForTokenClassification(TFElectraPreTrainedModel, TFTokenClassific ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1573,17 +1576,17 @@ class TFElectraForQuestionAnswering(TFElectraPreTrainedModel, TFQuestionAnswerin ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 5ec7f2932f..f5cd5e445a 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -14,6 +14,9 @@ # limitations under the License. """ Classes to support TF Encoder-Decoder architectures""" + +from __future__ import annotations + import inspect import re import warnings @@ -482,15 +485,15 @@ class TFEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLoss): @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/esm/modeling_tf_esm.py b/src/transformers/models/esm/modeling_tf_esm.py index 135c16a14b..df4ea54f83 100644 --- a/src/transformers/models/esm/modeling_tf_esm.py +++ b/src/transformers/models/esm/modeling_tf_esm.py @@ -14,6 +14,9 @@ # limitations under the License. """ PyTorch ESM model.""" + +from __future__ import annotations + import os from typing import Optional, Tuple, Union @@ -312,11 +315,11 @@ class TFEsmSelfAttention(Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -801,13 +804,13 @@ class TFEsmMainLayer(Layer): def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -984,13 +987,13 @@ class TFEsmModel(TFEsmPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1113,14 +1116,14 @@ class TFEsmForMaskedLM(TFEsmPreTrainedModel, TFMaskedLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1261,12 +1264,12 @@ class TFEsmForSequenceClassification(TFEsmPreTrainedModel, TFSequenceClassificat ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1356,12 +1359,12 @@ class TFEsmForTokenClassification(TFEsmPreTrainedModel, TFTokenClassificationLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index b1dd523ded..7f93caebb0 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -16,6 +16,9 @@ TF 2.0 Flaubert model. """ + +from __future__ import annotations + import itertools import random import warnings @@ -255,15 +258,15 @@ class TFFlaubertModel(TFFlaubertPreTrainedModel): ) def call( self, - input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -486,15 +489,15 @@ class TFFlaubertMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -754,8 +757,8 @@ class TFFlaubertWithLMHeadModelOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @add_start_docstrings( @@ -803,15 +806,15 @@ class TFFlaubertWithLMHeadModel(TFFlaubertPreTrainedModel): ) def call( self, - input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -874,19 +877,19 @@ class TFFlaubertForSequenceClassification(TFFlaubertPreTrainedModel, TFSequenceC ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -960,20 +963,20 @@ class TFFlaubertForQuestionAnsweringSimple(TFFlaubertPreTrainedModel, TFQuestion ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -1064,19 +1067,19 @@ class TFFlaubertForTokenClassification(TFFlaubertPreTrainedModel, TFTokenClassif ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1172,19 +1175,19 @@ class TFFlaubertForMultipleChoice(TFFlaubertPreTrainedModel, TFMultipleChoiceLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: if input_ids is not None: diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index 84254f2b28..fa077d612d 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF 2.0 Funnel model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -995,8 +998,8 @@ class TFFunnelForPreTrainingOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None FUNNEL_START_DOCSTRING = r""" @@ -1110,10 +1113,10 @@ class TFFunnelBaseModel(TFFunnelPreTrainedModel): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1158,10 +1161,10 @@ class TFFunnelModel(TFFunnelPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1206,10 +1209,10 @@ class TFFunnelForPreTraining(TFFunnelPreTrainedModel): @replace_return_docstrings(output_type=TFFunnelForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1285,14 +1288,14 @@ class TFFunnelForMaskedLM(TFFunnelPreTrainedModel, TFMaskedLanguageModelingLoss) ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFMaskedLMOutput]: r""" @@ -1357,14 +1360,14 @@ class TFFunnelForSequenceClassification(TFFunnelPreTrainedModel, TFSequenceClass ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFSequenceClassifierOutput]: r""" @@ -1441,14 +1444,14 @@ class TFFunnelForMultipleChoice(TFFunnelPreTrainedModel, TFMultipleChoiceLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFMultipleChoiceModelOutput]: r""" @@ -1550,14 +1553,14 @@ class TFFunnelForTokenClassification(TFFunnelPreTrainedModel, TFTokenClassificat ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFTokenClassifierOutput]: r""" @@ -1626,15 +1629,15 @@ class TFFunnelForQuestionAnswering(TFFunnelPreTrainedModel, TFQuestionAnsweringL ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFQuestionAnsweringModelOutput]: r""" diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index b7cb1b6df2..6b7476b71b 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -15,6 +15,8 @@ # limitations under the License. """ TF 2.0 OpenAI GPT-2 model.""" +from __future__ import annotations + from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -345,15 +347,15 @@ class TFGPT2MainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -583,9 +585,9 @@ class TFGPT2DoubleHeadsModelOutput(ModelOutput): logits: tf.Tensor = None mc_logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None GPT2_START_DOCSTRING = r""" @@ -716,15 +718,15 @@ class TFGPT2Model(TFGPT2PreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -844,20 +846,20 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -963,14 +965,14 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): @replace_return_docstrings(output_type=TFGPT2DoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - mc_token_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + mc_token_ids: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1124,18 +1126,18 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutputWithPast, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index fbef4f0eff..09e4330eb1 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -14,6 +14,8 @@ # limitations under the License. """ TF 2.0 GPT-J model.""" +from __future__ import annotations + from typing import Optional, Tuple, Union import numpy as np @@ -171,8 +173,8 @@ class TFGPTJAttention(tf.keras.layers.Layer): query: tf.Tensor, key: tf.Tensor, value: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, ) -> Tuple[tf.Tensor, tf.Tensor]: # compute causal mask from causal mask buffer query_length, key_length = shape_list(query)[-2], shape_list(key)[-2] @@ -207,9 +209,9 @@ class TFGPTJAttention(tf.keras.layers.Layer): self, hidden_states: tf.Tensor, layer_past: Optional[Tuple[tf.Tensor, tf.Tensor]] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, use_cache: bool = False, output_attentions: bool = False, ): @@ -301,10 +303,10 @@ class TFGPTJBlock(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - layer_past: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + layer_past: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, use_cache: bool = False, output_attentions: bool = False, ): @@ -659,13 +661,13 @@ class TFGPTJModel(TFGPTJPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -762,14 +764,14 @@ class TFGPTJForCausalLM(TFGPTJPreTrainedModel, TFCausalLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -865,14 +867,14 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -987,15 +989,15 @@ class TFGPTJForQuestionAnswering(TFGPTJPreTrainedModel, TFQuestionAnsweringLoss) ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/groupvit/modeling_tf_groupvit.py b/src/transformers/models/groupvit/modeling_tf_groupvit.py index 4891931c20..7188847209 100644 --- a/src/transformers/models/groupvit/modeling_tf_groupvit.py +++ b/src/transformers/models/groupvit/modeling_tf_groupvit.py @@ -15,6 +15,8 @@ """ TF 2.0 GroupViT model.""" +from __future__ import annotations + import collections.abc import math from dataclasses import dataclass @@ -247,7 +249,7 @@ class TFGroupViTModelOutput(ModelOutput): The output of the [`TFGroupViTVisionModel`]. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits_per_image: tf.Tensor = None logits_per_text: tf.Tensor = None segmentation_logits: tf.Tensor = None @@ -647,7 +649,7 @@ class TFGroupViTStage(tf.keras.layers.Layer): else: return x, None - def concat_x(self, x: tf.Tensor, group_token: Optional[tf.Tensor] = None) -> tf.Tensor: + def concat_x(self, x: tf.Tensor, group_token: tf.Tensor | None = None) -> tf.Tensor: if group_token is None: return x return tf.concat([x, group_token], axis=1) @@ -655,7 +657,7 @@ class TFGroupViTStage(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - prev_group_token: Optional[tf.Tensor] = None, + prev_group_token: tf.Tensor | None = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -1138,9 +1140,9 @@ class TFGroupViTTextMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1183,7 +1185,7 @@ class TFGroupViTVisionMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1262,9 +1264,9 @@ class TFGroupViTMainLayer(tf.keras.layers.Layer): @unpack_inputs def get_text_features( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1298,7 +1300,7 @@ class TFGroupViTMainLayer(tf.keras.layers.Layer): @unpack_inputs def get_image_features( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1325,10 +1327,10 @@ class TFGroupViTMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - pixel_values: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + pixel_values: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1635,9 +1637,9 @@ class TFGroupViTTextModel(TFGroupViTPreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=GroupViTTextConfig) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1731,7 +1733,7 @@ class TFGroupViTVisionModel(TFGroupViTPreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=GroupViTVisionConfig) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1831,9 +1833,9 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel): @add_start_docstrings_to_model_forward(GROUPVIT_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def get_text_features( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1872,7 +1874,7 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel): @add_start_docstrings_to_model_forward(GROUPVIT_VISION_INPUTS_DOCSTRING) def get_image_features( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1916,10 +1918,10 @@ class TFGroupViTModel(TFGroupViTPreTrainedModel): @replace_return_docstrings(output_type=TFGroupViTModelOutput, config_class=GroupViTConfig) def call( self, - input_ids: Optional[TFModelInputType] = None, - pixel_values: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + pixel_values: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, return_loss: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index 24cbde9af7..fd1f17edfb 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ TensorFlow Hubert model.""" + +from __future__ import annotations + import warnings from typing import Any, Dict, Optional, Tuple, Union @@ -642,12 +645,12 @@ class TFHubertAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -812,7 +815,7 @@ class TFHubertEncoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -856,7 +859,7 @@ class TFHubertEncoderLayerStableLayerNorm(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -890,7 +893,7 @@ class TFHubertEncoder(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, @@ -958,7 +961,7 @@ class TFHubertEncoderStableLayerNorm(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, @@ -1048,7 +1051,7 @@ class TFHubertMainLayer(tf.keras.layers.Layer): return input_lengths - def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: Optional[tf.Tensor] = None): + def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: tf.Tensor | None = None): """ Masks extracted features along time axis and/or along feature axis according to [SpecAugment](https://arxiv.org/abs/1904.08779). @@ -1096,13 +1099,13 @@ class TFHubertMainLayer(tf.keras.layers.Layer): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - output_attentions: Optional[tf.Tensor] = None, - output_hidden_states: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + output_attentions: tf.Tensor | None = None, + output_hidden_states: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: bool = False, **kwargs: Any, @@ -1299,11 +1302,11 @@ class TFHubertModel(TFHubertPreTrainedModel): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1401,13 +1404,13 @@ class TFHubertForCTC(TFHubertPreTrainedModel): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 2755e05537..67128e0c13 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF 2.0 LayoutLM model.""" + +from __future__ import annotations + import math import warnings from typing import Dict, Optional, Tuple, Union @@ -423,9 +426,9 @@ class TFLayoutLMLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -502,9 +505,9 @@ class TFLayoutLMEncoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -694,15 +697,15 @@ class TFLayoutLMMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -919,15 +922,15 @@ class TFLayoutLMModel(TFLayoutLMPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1039,17 +1042,17 @@ class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingL @replace_return_docstrings(output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1162,17 +1165,17 @@ class TFLayoutLMForSequenceClassification(TFLayoutLMPreTrainedModel, TFSequenceC @replace_return_docstrings(output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1292,17 +1295,17 @@ class TFLayoutLMForTokenClassification(TFLayoutLMPreTrainedModel, TFTokenClassif @replace_return_docstrings(output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1418,18 +1421,18 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer @replace_return_docstrings(output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - bbox: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + bbox: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index 491ef186e5..67377c5baf 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -14,6 +14,9 @@ # limitations under the License. """TF 2.0 LayoutLMv3 model.""" + +from __future__ import annotations + import collections import math from typing import Dict, List, Optional, Tuple, Union @@ -222,11 +225,11 @@ class TFLayoutLMv3TextEmbeddings(tf.keras.layers.Layer): def call( self, - input_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, bbox: tf.Tensor = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, training: bool = False, ) -> tf.Tensor: if position_ids is None: @@ -319,11 +322,11 @@ class TFLayoutLMv3SelfAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor], - head_mask: Optional[tf.Tensor], + attention_mask: tf.Tensor | None, + head_mask: tf.Tensor | None, output_attentions: bool, - rel_pos: Optional[tf.Tensor] = None, - rel_2d_pos: Optional[tf.Tensor] = None, + rel_pos: tf.Tensor | None = None, + rel_2d_pos: tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor]]: key_layer = self.transpose_for_scores(self.key(hidden_states)) @@ -398,11 +401,11 @@ class TFLayoutLMv3Attention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor], - head_mask: Optional[tf.Tensor], + attention_mask: tf.Tensor | None, + head_mask: tf.Tensor | None, output_attentions: bool, - rel_pos: Optional[tf.Tensor] = None, - rel_2d_pos: Optional[tf.Tensor] = None, + rel_pos: tf.Tensor | None = None, + rel_2d_pos: tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor]]: self_outputs = self.self_attention( @@ -469,11 +472,11 @@ class TFLayoutLMv3Layer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor], - head_mask: Optional[tf.Tensor], + attention_mask: tf.Tensor | None, + head_mask: tf.Tensor | None, output_attentions: bool, - rel_pos: Optional[tf.Tensor] = None, - rel_2d_pos: Optional[tf.Tensor] = None, + rel_pos: tf.Tensor | None = None, + rel_2d_pos: tf.Tensor | None = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor]]: self_attention_outputs = self.attention( @@ -593,13 +596,13 @@ class TFLayoutLMv3Encoder(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - bbox: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + bbox: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = True, - position_ids: Optional[tf.Tensor] = None, + position_ids: tf.Tensor | None = None, training: bool = False, ) -> Union[ TFBaseModelOutput, @@ -778,7 +781,7 @@ class TFLayoutLMv3MainLayer(tf.keras.layers.Layer): return extended_attention_mask - def get_head_mask(self, head_mask: Optional[tf.Tensor]) -> Union[tf.Tensor, List[Optional[tf.Tensor]]]: + def get_head_mask(self, head_mask: tf.Tensor | None) -> Union[tf.Tensor, List[tf.Tensor | None]]: if head_mask is None: return [None] * self.config.num_hidden_layers @@ -806,14 +809,14 @@ class TFLayoutLMv3MainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - bbox: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + bbox: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1145,14 +1148,14 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - bbox: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + bbox: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1272,18 +1275,18 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque @replace_return_docstrings(output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - bbox: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, + bbox: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[ TFSequenceClassifierOutput, @@ -1392,18 +1395,18 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla @replace_return_docstrings(output_type=TFTokenClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - bbox: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + bbox: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[ TFTokenClassifierOutput, @@ -1514,18 +1517,18 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn @replace_return_docstrings(output_type=TFQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - start_positions: Optional[tf.Tensor] = None, - end_positions: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + start_positions: tf.Tensor | None = None, + end_positions: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, - bbox: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, + bbox: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: bool = False, ) -> Union[ diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 324482b4d2..4e815da33d 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -15,6 +15,8 @@ """ TF 2.0 LED model.""" +from __future__ import annotations + import random from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -1030,12 +1032,12 @@ class TFLEDDecoderAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training=False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -1238,12 +1240,12 @@ class TFLEDDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - encoder_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + encoder_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training=False, ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -1389,9 +1391,9 @@ class TFLEDEncoderBaseModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -1452,14 +1454,14 @@ class TFLEDSeq2SeqModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_global_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None + encoder_global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -1517,16 +1519,16 @@ class TFLEDSeq2SeqLMOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - decoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - decoder_attentions: Optional[Tuple[tf.Tensor]] = None - cross_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_last_hidden_state: Optional[tf.Tensor] = None - encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None - encoder_attentions: Optional[Tuple[tf.Tensor]] = None - encoder_global_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor] | None = None + decoder_attentions: Tuple[tf.Tensor] | None = None + cross_attentions: Tuple[tf.Tensor] | None = None + encoder_last_hidden_state: tf.Tensor | None = None + encoder_hidden_states: Tuple[tf.Tensor] | None = None + encoder_attentions: Tuple[tf.Tensor] | None = None + encoder_global_attentions: Tuple[tf.Tensor] | None = None LED_START_DOCSTRING = r""" @@ -2383,22 +2385,22 @@ class TFLEDForConditionalGeneration(TFLEDPreTrainedModel): @replace_return_docstrings(output_type=TFLEDSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[TFLEDEncoderBaseModelOutput] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ): """ diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index c47df16965..b5adb2c803 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -14,6 +14,9 @@ # limitations under the License. """Tensorflow Longformer model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Optional, Tuple, Union @@ -101,9 +104,9 @@ class TFLongformerBaseModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -149,9 +152,9 @@ class TFLongformerBaseModelOutputWithPooling(ModelOutput): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -193,11 +196,11 @@ class TFLongformerMaskedLMOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -241,12 +244,12 @@ class TFLongformerQuestionAnsweringModelOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -288,11 +291,11 @@ class TFLongformerSequenceClassifierOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -336,11 +339,11 @@ class TFLongformerMultipleChoiceModelOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -382,11 +385,11 @@ class TFLongformerTokenClassifierOutput(ModelOutput): in the sequence. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - global_attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + global_attentions: Tuple[tf.Tensor] | None = None def _compute_global_attention_mask(input_ids_shape, sep_token_indices, before_sep_token=True): @@ -2038,13 +2041,13 @@ class TFLongformerModel(TFLongformerPreTrainedModel): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -2113,17 +2116,17 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFLongformerMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -2206,18 +2209,18 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFLongformerQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -2369,17 +2372,17 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFLongformerSequenceClassifierOutput, Tuple[tf.Tensor]]: if input_ids is not None and not isinstance(input_ids, tf.Tensor): @@ -2491,17 +2494,17 @@ class TFLongformerForMultipleChoice(TFLongformerPreTrainedModel, TFMultipleChoic ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFLongformerMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -2619,13 +2622,13 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - global_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + global_attention_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index f02d0ff644..59bc185914 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -16,6 +16,9 @@ # limitations under the License. """ TF 2.0 LXMERT model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -90,14 +93,14 @@ class TFLxmertModelOutput(ModelOutput): the self-attention heads. """ - language_output: Optional[tf.Tensor] = None - vision_output: Optional[tf.Tensor] = None - pooled_output: Optional[tf.Tensor] = None - language_hidden_states: Optional[Tuple[tf.Tensor]] = None - vision_hidden_states: Optional[Tuple[tf.Tensor]] = None - language_attentions: Optional[Tuple[tf.Tensor]] = None - vision_attentions: Optional[Tuple[tf.Tensor]] = None - cross_encoder_attentions: Optional[Tuple[tf.Tensor]] = None + language_output: tf.Tensor | None = None + vision_output: tf.Tensor | None = None + pooled_output: tf.Tensor | None = None + language_hidden_states: Tuple[tf.Tensor] | None = None + vision_hidden_states: Tuple[tf.Tensor] | None = None + language_attentions: Tuple[tf.Tensor] | None = None + vision_attentions: Tuple[tf.Tensor] | None = None + cross_encoder_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -137,15 +140,15 @@ class TFLxmertForPreTrainingOutput(ModelOutput): """ - loss: Optional[tf.Tensor] = None - prediction_logits: Optional[tf.Tensor] = None - cross_relationship_score: Optional[tf.Tensor] = None - question_answering_score: Optional[tf.Tensor] = None - language_hidden_states: Optional[Tuple[tf.Tensor]] = None - vision_hidden_states: Optional[Tuple[tf.Tensor]] = None - language_attentions: Optional[Tuple[tf.Tensor]] = None - vision_attentions: Optional[Tuple[tf.Tensor]] = None - cross_encoder_attentions: Optional[Tuple[tf.Tensor]] = None + loss: tf.Tensor | None = None + prediction_logits: tf.Tensor | None = None + cross_relationship_score: tf.Tensor | None = None + question_answering_score: tf.Tensor | None = None + language_hidden_states: Tuple[tf.Tensor] | None = None + vision_hidden_states: Tuple[tf.Tensor] | None = None + language_attentions: Tuple[tf.Tensor] | None = None + vision_attentions: Tuple[tf.Tensor] | None = None + cross_encoder_attentions: Tuple[tf.Tensor] | None = None class TFLxmertVisualFeatureEncoder(tf.keras.layers.Layer): @@ -945,13 +948,13 @@ class TFLxmertModel(TFLxmertPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - visual_feats: Optional[tf.Tensor] = None, - visual_pos: Optional[tf.Tensor] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - visual_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + visual_feats: tf.Tensor | None = None, + visual_pos: tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + visual_attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index 1751158832..208e9b8335 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -15,6 +15,8 @@ """ TF 2.0 Marian model.""" +from __future__ import annotations + import random from typing import Optional, Tuple, Union @@ -165,7 +167,7 @@ class TFMarianSinusoidalPositionalEmbedding(tf.keras.layers.Layer): return table def call( - self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: Optional[tf.Tensor] = None + self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: tf.Tensor | None = None ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -212,12 +214,12 @@ class TFMarianAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -347,8 +349,8 @@ class TFMarianEncoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]], - layer_head_mask: Optional[tf.Tensor], + attention_mask: np.ndarray | tf.Tensor | None, + layer_head_mask: tf.Tensor | None, training: Optional[bool] = False, ) -> tf.Tensor: """ @@ -417,11 +419,11 @@ class TFMarianDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, past_key_value: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: @@ -708,10 +710,10 @@ class TFMarianEncoder(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -870,15 +872,15 @@ class TFMarianDecoder(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, + input_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1097,18 +1099,18 @@ class TFMarianMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Tuple[Tuple[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1202,18 +1204,18 @@ class TFMarianModel(TFMarianPreTrainedModel): ) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, - encoder_outputs: Optional[tf.Tensor] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, + encoder_outputs: tf.Tensor | None = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1330,23 +1332,23 @@ class TFMarianMTModel(TFMarianPreTrainedModel, TFCausalLanguageModelingLoss): @add_end_docstrings(MARIAN_GENERATION_EXAMPLE) def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ): r""" diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 13453bd22d..293c564141 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -15,6 +15,8 @@ """ TF 2.0 MBart model.""" +from __future__ import annotations + import random from typing import Optional, Tuple, Union @@ -131,7 +133,7 @@ class TFMBartLearnedPositionalEmbedding(tf.keras.layers.Embedding): self, input_shape: Optional[tf.TensorShape] = None, past_key_values_length: int = 0, - position_ids: Optional[tf.Tensor] = None, + position_ids: tf.Tensor | None = None, ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -181,12 +183,12 @@ class TFMBartAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -384,12 +386,12 @@ class TFMBartDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -700,10 +702,10 @@ class TFMBartEncoder(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -868,14 +870,14 @@ class TFMBartDecoder(tf.keras.layers.Layer): def call( self, input_ids: TFModelInputType = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1100,17 +1102,17 @@ class TFMBartMainLayer(tf.keras.layers.Layer): def call( self, input_ids: TFModelInputType = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1208,17 +1210,17 @@ class TFMBartModel(TFMBartPreTrainedModel): def call( self, input_ids: TFModelInputType = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1336,22 +1338,22 @@ class TFMBartForConditionalGeneration(TFMBartPreTrainedModel, TFCausalLanguageMo def call( self, input_ids: TFModelInputType = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, past_key_values: Tuple[Tuple[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: """ diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index c47cde847d..eddb339074 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 MobileBERT model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union @@ -846,11 +849,11 @@ class TFMobileBertForPreTrainingOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None prediction_logits: tf.Tensor = None seq_relationship_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None MOBILEBERT_START_DOCSTRING = r""" @@ -969,12 +972,12 @@ class TFMobileBertModel(TFMobileBertPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1033,17 +1036,17 @@ class TFMobileBertForPreTraining(TFMobileBertPreTrainedModel, TFMobileBertPreTra @replace_return_docstrings(output_type=TFMobileBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, - next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, + next_sentence_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMobileBertForPreTrainingOutput]: r""" @@ -1141,16 +1144,16 @@ class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModel ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMaskedLMOutput]: r""" @@ -1224,16 +1227,16 @@ class TFMobileBertForNextSentencePrediction(TFMobileBertPreTrainedModel, TFNextS @replace_return_docstrings(output_type=TFNextSentencePredictorOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - next_sentence_label: Optional[Union[np.ndarray, tf.Tensor]] = None, + next_sentence_label: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFNextSentencePredictorOutput]: r""" @@ -1335,16 +1338,16 @@ class TFMobileBertForSequenceClassification(TFMobileBertPreTrainedModel, TFSeque ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" @@ -1430,17 +1433,17 @@ class TFMobileBertForQuestionAnswering(TFMobileBertPreTrainedModel, TFQuestionAn ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFQuestionAnsweringModelOutput]: r""" @@ -1546,16 +1549,16 @@ class TFMobileBertForMultipleChoice(TFMobileBertPreTrainedModel, TFMultipleChoic ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFMultipleChoiceModelOutput]: r""" @@ -1674,16 +1677,16 @@ class TFMobileBertForTokenClassification(TFMobileBertPreTrainedModel, TFTokenCla ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFTokenClassifierOutput]: r""" diff --git a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py index 1b06f36536..879c642800 100644 --- a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py +++ b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py @@ -16,6 +16,8 @@ # Original license: https://github.com/apple/ml-cvnets/blob/main/LICENSE """ TensorFlow 2.0 MobileViT model.""" +from __future__ import annotations + from typing import Dict, Optional, Tuple, Union import tensorflow as tf @@ -663,7 +665,7 @@ class TFMobileViTMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -846,7 +848,7 @@ class TFMobileViTModel(TFMobileViTPreTrainedModel): ) def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -893,9 +895,9 @@ class TFMobileViTForImageClassification(TFMobileViTPreTrainedModel, TFSequenceCl ) def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, ) -> Union[tuple, TFImageClassifierOutputWithNoAttention]: @@ -1083,8 +1085,8 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel): @replace_return_docstrings(output_type=TFSemanticSegmenterOutputWithNoAttention, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index 08db310173..2f4178d6cf 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -16,6 +16,8 @@ """ TF 2.0 MPNet model.""" +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -682,11 +684,11 @@ class TFMPNetModel(TFMPNetPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, attention_mask: Optional[Union[np.array, tf.Tensor]] = None, position_ids: Optional[Union[np.array, tf.Tensor]] = None, head_mask: Optional[Union[np.array, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -795,15 +797,15 @@ class TFMPNetForMaskedLM(TFMPNetPreTrainedModel, TFMaskedLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -898,15 +900,15 @@ class TFMPNetForSequenceClassification(TFMPNetPreTrainedModel, TFSequenceClassif ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, attention_mask: Optional[Union[np.array, tf.Tensor]] = None, position_ids: Optional[Union[np.array, tf.Tensor]] = None, head_mask: Optional[Union[np.array, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -987,15 +989,15 @@ class TFMPNetForMultipleChoice(TFMPNetPreTrainedModel, TFMultipleChoiceLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1096,15 +1098,15 @@ class TFMPNetForTokenClassification(TFMPNetPreTrainedModel, TFTokenClassificatio ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1176,16 +1178,16 @@ class TFMPNetForQuestionAnswering(TFMPNetPreTrainedModel, TFQuestionAnsweringLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, attention_mask: Optional[Union[np.array, tf.Tensor]] = None, position_ids: Optional[Union[np.array, tf.Tensor]] = None, head_mask: Optional[Union[np.array, tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[tf.Tensor] = None, - end_positions: Optional[tf.Tensor] = None, + start_positions: tf.Tensor | None = None, + end_positions: tf.Tensor | None = None, training: bool = False, **kwargs, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 7c04520c9c..3f89672419 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -15,6 +15,8 @@ # limitations under the License. """ TF 2.0 OpenAI GPT model.""" +from __future__ import annotations + from dataclasses import dataclass from typing import Optional, Tuple, Union @@ -237,12 +239,12 @@ class TFOpenAIGPTMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -394,8 +396,8 @@ class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput): logits: tf.Tensor = None mc_logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None OPENAI_GPT_START_DOCSTRING = r""" @@ -514,12 +516,12 @@ class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -576,16 +578,16 @@ class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel, TFCausalLanguageModelin ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFCausalLMOutput]: r""" @@ -661,13 +663,13 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): @replace_return_docstrings(output_type=TFOpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - mc_token_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + mc_token_ids: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -809,16 +811,16 @@ class TFOpenAIGPTForSequenceClassification(TFOpenAIGPTPreTrainedModel, TFSequenc ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index 1855fcb1bc..227e56fdef 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -15,6 +15,8 @@ """ TF 2.0 OPT model.""" +from __future__ import annotations + from typing import Optional, Tuple, Union import numpy as np @@ -152,12 +154,12 @@ class TFOPTAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -291,8 +293,8 @@ class TFOPTDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - layer_head_mask: Optional[tf.Tensor] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, past_key_value: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, training: Optional[bool] = False, output_attentions: Optional[bool] = False, @@ -552,10 +554,10 @@ class TFOPTDecoder(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -732,11 +734,11 @@ class TFOPTMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -804,11 +806,11 @@ class TFOPTModel(TFOPTPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -901,13 +903,13 @@ class TFOPTForCausalLM(TFOPTPreTrainedModel, TFCausalLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, + input_ids: TFModelInputType | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 1ccccc2dc5..7de1542ebe 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -15,6 +15,8 @@ """ TF 2.0 Pegasus model.""" +from __future__ import annotations + import random from typing import Optional, Tuple, Union @@ -167,7 +169,7 @@ class TFPegasusSinusoidalPositionalEmbedding(tf.keras.layers.Layer): return table def call( - self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: Optional[tf.Tensor] = None + self, input_shape: tf.TensorShape, past_key_values_length: int = 0, position_ids: tf.Tensor | None = None ): """Input is expected to be of size [bsz x seqlen].""" if position_ids is None: @@ -214,12 +216,12 @@ class TFPegasusAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -419,12 +421,12 @@ class TFPegasusDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -712,10 +714,10 @@ class TFPegasusEncoder(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -877,14 +879,14 @@ class TFPegasusDecoder(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, past_key_values: Tuple[Tuple[tf.Tensor]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1106,18 +1108,18 @@ class TFPegasusMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - decoder_input_ids: Optional[tf.Tensor] = None, - decoder_attention_mask: Optional[tf.Tensor] = None, - decoder_position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - decoder_head_mask: Optional[tf.Tensor] = None, - cross_attn_head_mask: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + decoder_input_ids: tf.Tensor | None = None, + decoder_attention_mask: tf.Tensor | None = None, + decoder_position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + decoder_head_mask: tf.Tensor | None = None, + cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Tuple[Tuple[tf.Tensor]] = None, - inputs_embeds: Optional[tf.Tensor] = None, - decoder_inputs_embeds: Optional[tf.Tensor] = None, + inputs_embeds: tf.Tensor | None = None, + decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1211,18 +1213,18 @@ class TFPegasusModel(TFPegasusPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1339,23 +1341,23 @@ class TFPegasusForConditionalGeneration(TFPegasusPreTrainedModel, TFCausalLangua @add_end_docstrings(PEGASUS_GENERATION_EXAMPLE) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFSeq2SeqLMOutput, Tuple[tf.Tensor]]: """ diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 0ea2e55448..d91fa71df8 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -15,6 +15,9 @@ """TFRAG model implementation.""" + +from __future__ import annotations + import copy from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -111,22 +114,22 @@ class TFRetrievAugLMMarginOutput(ModelOutput): average in the self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - doc_scores: Optional[tf.Tensor] = None - retrieved_doc_embeds: Optional[tf.Tensor] = None - retrieved_doc_ids: Optional[tf.Tensor] = None - context_input_ids: Optional[tf.Tensor] = None - context_attention_mask: Optional[tf.Tensor] = None - question_encoder_last_hidden_state: Optional[tf.Tensor] = None - question_enc_hidden_states: Optional[Tuple[tf.Tensor]] = None - question_enc_attentions: Optional[Tuple[tf.Tensor]] = None - generator_enc_last_hidden_state: Optional[tf.Tensor] = None - generator_enc_hidden_states: Optional[Tuple[tf.Tensor]] = None - generator_enc_attentions: Optional[Tuple[tf.Tensor]] = None - generator_dec_hidden_states: Optional[Tuple[tf.Tensor]] = None - generator_dec_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + doc_scores: tf.Tensor | None = None + retrieved_doc_embeds: tf.Tensor | None = None + retrieved_doc_ids: tf.Tensor | None = None + context_input_ids: tf.Tensor | None = None + context_attention_mask: tf.Tensor | None = None + question_encoder_last_hidden_state: tf.Tensor | None = None + question_enc_hidden_states: Tuple[tf.Tensor] | None = None + question_enc_attentions: Tuple[tf.Tensor] | None = None + generator_enc_last_hidden_state: tf.Tensor | None = None + generator_enc_hidden_states: Tuple[tf.Tensor] | None = None + generator_enc_attentions: Tuple[tf.Tensor] | None = None + generator_dec_hidden_states: Tuple[tf.Tensor] | None = None + generator_dec_attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -196,20 +199,20 @@ class TFRetrievAugLMOutput(ModelOutput): """ logits: tf.Tensor = None - past_key_values: Optional[List[tf.Tensor]] = None - doc_scores: Optional[tf.Tensor] = None - retrieved_doc_embeds: Optional[tf.Tensor] = None - retrieved_doc_ids: Optional[tf.Tensor] = None - context_input_ids: Optional[tf.Tensor] = None - context_attention_mask: Optional[tf.Tensor] = None - question_encoder_last_hidden_state: Optional[tf.Tensor] = None - question_enc_hidden_states: Optional[Tuple[tf.Tensor]] = None - question_enc_attentions: Optional[Tuple[tf.Tensor]] = None - generator_enc_last_hidden_state: Optional[tf.Tensor] = None - generator_enc_hidden_states: Optional[Tuple[tf.Tensor]] = None - generator_enc_attentions: Optional[Tuple[tf.Tensor]] = None - generator_dec_hidden_states: Optional[Tuple[tf.Tensor]] = None - generator_dec_attentions: Optional[Tuple[tf.Tensor]] = None + past_key_values: List[tf.Tensor] | None = None + doc_scores: tf.Tensor | None = None + retrieved_doc_embeds: tf.Tensor | None = None + retrieved_doc_ids: tf.Tensor | None = None + context_input_ids: tf.Tensor | None = None + context_attention_mask: tf.Tensor | None = None + question_encoder_last_hidden_state: tf.Tensor | None = None + question_enc_hidden_states: Tuple[tf.Tensor] | None = None + question_enc_attentions: Tuple[tf.Tensor] | None = None + generator_enc_last_hidden_state: tf.Tensor | None = None + generator_enc_hidden_states: Tuple[tf.Tensor] | None = None + generator_enc_attentions: Tuple[tf.Tensor] | None = None + generator_dec_hidden_states: Tuple[tf.Tensor] | None = None + generator_dec_attentions: Tuple[tf.Tensor] | None = None class TFRagPreTrainedModel(TFPreTrainedModel): @@ -545,15 +548,15 @@ class TFRagModel(TFRagPreTrainedModel): @replace_return_docstrings(output_type=TFRetrievAugLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - doc_scores: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + doc_scores: np.ndarray | tf.Tensor | None = None, + context_input_ids: np.ndarray | tf.Tensor | None = None, + context_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -841,22 +844,22 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss @replace_return_docstrings(output_type=TFRetrievAugLMMarginOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - doc_scores: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + doc_scores: np.ndarray | tf.Tensor | None = None, + context_input_ids: np.ndarray | tf.Tensor | None = None, + context_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, output_retrieved: Optional[bool] = None, n_docs: Optional[int] = None, do_marginalize: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, reduce_loss: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -993,8 +996,8 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss def generate( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: tf.Tensor | None = None, context_input_ids=None, context_attention_mask=None, doc_scores=None, @@ -1347,22 +1350,22 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL @replace_return_docstrings(output_type=TFRetrievAugLMMarginOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - doc_scores: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - context_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + doc_scores: np.ndarray | tf.Tensor | None = None, + context_input_ids: np.ndarray | tf.Tensor | None = None, + context_attention_mask: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, output_retrieved: Optional[bool] = None, n_docs: Optional[int] = None, exclude_bos_score: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, reduce_loss: Optional[bool] = None, return_dict: Optional[bool] = None, training: bool = False, @@ -1579,8 +1582,8 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL def generate( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[tf.Tensor] = None, + input_ids: TFModelInputType | None = None, + attention_mask: tf.Tensor | None = None, context_input_ids=None, context_attention_mask=None, doc_scores=None, diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index c4dc8c5a14..097bd977a4 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -15,6 +15,8 @@ """ TF 2.0 RemBERT model.""" +from __future__ import annotations + import math from typing import Dict, Optional, Tuple, Union @@ -382,9 +384,9 @@ class TFRemBertLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -645,14 +647,14 @@ class TFRemBertMainLayer(tf.keras.layers.Layer): # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.call def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -946,14 +948,14 @@ class TFRemBertModel(TFRemBertPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1048,16 +1050,16 @@ class TFRemBertForMaskedLM(TFRemBertPreTrainedModel, TFMaskedLanguageModelingLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1137,20 +1139,20 @@ class TFRemBertForCausalLM(TFRemBertPreTrainedModel, TFCausalLanguageModelingLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1259,16 +1261,16 @@ class TFRemBertForSequenceClassification(TFRemBertPreTrainedModel, TFSequenceCla ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1348,16 +1350,16 @@ class TFRemBertForMultipleChoice(TFRemBertPreTrainedModel, TFMultipleChoiceLoss) ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1466,16 +1468,16 @@ class TFRemBertForTokenClassification(TFRemBertPreTrainedModel, TFTokenClassific ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1544,17 +1546,17 @@ class TFRemBertForQuestionAnswering(TFRemBertPreTrainedModel, TFQuestionAnswerin ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 7aa2c9e07a..585c4d31ad 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 RoBERTa model.""" + +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -431,9 +434,9 @@ class TFRobertaLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -510,9 +513,9 @@ class TFRobertaEncoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -609,14 +612,14 @@ class TFRobertaMainLayer(tf.keras.layers.Layer): # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.call def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -923,14 +926,14 @@ class TFRobertaModel(TFRobertaPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1081,16 +1084,16 @@ class TFRobertaForMaskedLM(TFRobertaPreTrainedModel, TFMaskedLanguageModelingLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1178,20 +1181,20 @@ class TFRobertaForCausalLM(TFRobertaPreTrainedModel, TFCausalLanguageModelingLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1329,16 +1332,16 @@ class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel, TFSequenceCla ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1423,16 +1426,16 @@ class TFRobertaForMultipleChoice(TFRobertaPreTrainedModel, TFMultipleChoiceLoss) ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1539,16 +1542,16 @@ class TFRobertaForTokenClassification(TFRobertaPreTrainedModel, TFTokenClassific ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1624,17 +1627,17 @@ class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel, TFQuestionAnswerin ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py index fedfea56a7..80a834ad58 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 RoBERTa-PreLayerNorm model.""" + +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -435,9 +438,9 @@ class TFRobertaPreLayerNormLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -514,9 +517,9 @@ class TFRobertaPreLayerNormEncoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -610,14 +613,14 @@ class TFRobertaPreLayerNormMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -925,14 +928,14 @@ class TFRobertaPreLayerNormModel(TFRobertaPreLayerNormPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1090,16 +1093,16 @@ class TFRobertaPreLayerNormForMaskedLM(TFRobertaPreLayerNormPreTrainedModel, TFM # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForMaskedLM.call with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,roberta->roberta_prelayernorm def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1194,20 +1197,20 @@ class TFRobertaPreLayerNormForCausalLM(TFRobertaPreLayerNormPreTrainedModel, TFC ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1349,16 +1352,16 @@ class TFRobertaPreLayerNormForSequenceClassification( # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForSequenceClassification.call with roberta->roberta_prelayernorm def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1446,16 +1449,16 @@ class TFRobertaPreLayerNormForMultipleChoice(TFRobertaPreLayerNormPreTrainedMode ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1563,16 +1566,16 @@ class TFRobertaPreLayerNormForTokenClassification(TFRobertaPreLayerNormPreTraine # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForTokenClassification.call with roberta->roberta_prelayernorm def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1649,17 +1652,17 @@ class TFRobertaPreLayerNormForQuestionAnswering(TFRobertaPreLayerNormPreTrainedM # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForQuestionAnswering.call with roberta->roberta_prelayernorm def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index 2d1387d2d8..50b5757146 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -15,6 +15,8 @@ """ TF 2.0 RoFormer model.""" +from __future__ import annotations + import math from typing import Dict, Optional, Tuple, Union @@ -604,11 +606,11 @@ class TFRoFormerMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -809,11 +811,11 @@ class TFRoFormerModel(TFRoFormerPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -866,15 +868,15 @@ class TFRoFormerForMaskedLM(TFRoFormerPreTrainedModel, TFMaskedLanguageModelingL ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -940,15 +942,15 @@ class TFRoFormerForCausalLM(TFRoFormerPreTrainedModel, TFCausalLanguageModelingL ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutput, Tuple[tf.Tensor]]: r""" @@ -1049,15 +1051,15 @@ class TFRoFormerForSequenceClassification(TFRoFormerPreTrainedModel, TFSequenceC ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1138,15 +1140,15 @@ class TFRoFormerForMultipleChoice(TFRoFormerPreTrainedModel, TFMultipleChoiceLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1250,15 +1252,15 @@ class TFRoFormerForTokenClassification(TFRoFormerPreTrainedModel, TFTokenClassif ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1326,16 +1328,16 @@ class TFRoFormerForQuestionAnswering(TFRoFormerPreTrainedModel, TFQuestionAnswer ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/sam/modeling_tf_sam.py b/src/transformers/models/sam/modeling_tf_sam.py index ddd8e526a7..8b00f13b44 100644 --- a/src/transformers/models/sam/modeling_tf_sam.py +++ b/src/transformers/models/sam/modeling_tf_sam.py @@ -17,6 +17,9 @@ TensorFlow SAM model. This file was mostly generated by auto-translation from th discrepancy, the original file should be regarded as the 'reference' version. """ + +from __future__ import annotations + import collections from dataclasses import dataclass from typing import Dict, List, Optional, Tuple, Union @@ -26,7 +29,7 @@ import tensorflow as tf from ...activations_tf import ACT2FN from ...modeling_tf_outputs import TFBaseModelOutput -from ...modeling_tf_utils import TFPreTrainedModel, shape_list, unpack_inputs +from ...modeling_tf_utils import TFModelInputType, TFPreTrainedModel, shape_list, unpack_inputs from ...tf_utils import flatten, functional_layernorm from ...utils import ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, logging from .configuration_sam import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig @@ -69,10 +72,10 @@ class TFSamVisionEncoderOutput(ModelOutput): heads. """ - image_embeds: Optional[tf.Tensor] = None + image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -106,9 +109,9 @@ class TFSamImageSegmentationOutput(ModelOutput): iou_scores: tf.Tensor = None pred_masks: tf.Tensor = None - vision_hidden_states: Optional[Tuple[tf.Tensor]] = None - vision_attentions: Optional[Tuple[tf.Tensor]] = None - mask_decoder_attentions: Optional[Tuple[tf.Tensor]] = None + vision_hidden_states: Tuple[tf.Tensor] | None = None + vision_attentions: Tuple[tf.Tensor] | None = None + mask_decoder_attentions: Tuple[tf.Tensor] | None = None class TFSamPatchEmbeddings(tf.keras.layers.Layer): @@ -734,9 +737,9 @@ class TFSamPromptEncoder(tf.keras.layers.Layer): self, batch_size: Optional[int], input_points: Optional[Tuple[tf.Tensor, tf.Tensor]], - input_labels: Optional[tf.Tensor], - input_boxes: Optional[tf.Tensor], - input_masks: Optional[tf.Tensor], + input_labels: tf.Tensor | None, + input_boxes: tf.Tensor | None, + input_masks: tf.Tensor | None, ) -> Tuple[tf.Tensor, tf.Tensor]: """ Embeds different types of prompts, returning both sparse and dense embeddings. @@ -1084,7 +1087,7 @@ class TFSamVisionEncoder(tf.keras.layers.Layer): def call( self, - pixel_values: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1326,10 +1329,10 @@ class TFSamModel(TFSamPreTrainedModel): def get_prompt_embeddings( self, - input_points: Optional[tf.Tensor] = None, - input_labels: Optional[tf.Tensor] = None, - input_boxes: Optional[tf.Tensor] = None, - input_masks: Optional[tf.Tensor] = None, + input_points: tf.Tensor | None = None, + input_labels: tf.Tensor | None = None, + input_boxes: tf.Tensor | None = None, + input_masks: tf.Tensor | None = None, ): r""" Returns the prompt embeddings by passing the input points, labels, boxes and masks through the prompt encoder. @@ -1360,12 +1363,12 @@ class TFSamModel(TFSamPreTrainedModel): @add_start_docstrings_to_model_forward(SAM_INPUTS_DOCSTRING) def call( self, - pixel_values: Optional[tf.Tensor] = None, - input_points: Optional[tf.Tensor] = None, - input_labels: Optional[tf.Tensor] = None, - input_boxes: Optional[tf.Tensor] = None, - input_masks: Optional[tf.Tensor] = None, - image_embeddings: Optional[tf.Tensor] = None, + pixel_values: TFModelInputType | None = None, + input_points: tf.Tensor | None = None, + input_labels: tf.Tensor | None = None, + input_boxes: tf.Tensor | None = None, + input_masks: tf.Tensor | None = None, + image_embeddings: tf.Tensor | None = None, multimask_output: bool = True, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/segformer/modeling_tf_segformer.py b/src/transformers/models/segformer/modeling_tf_segformer.py index c877e86acf..47b7ce8e8c 100644 --- a/src/transformers/models/segformer/modeling_tf_segformer.py +++ b/src/transformers/models/segformer/modeling_tf_segformer.py @@ -14,6 +14,9 @@ # limitations under the License. """ TensorFlow SegFormer model.""" + +from __future__ import annotations + import math from typing import Dict, Optional, Tuple, Union @@ -664,8 +667,8 @@ class TFSegformerForImageClassification(TFSegformerPreTrainedModel, TFSequenceCl ) def call( self, - pixel_values: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -816,7 +819,7 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel): def call( self, pixel_values: tf.Tensor, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index e5c38afa83..3651506894 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -15,6 +15,8 @@ """ TensorFlow Speech2Text model.""" +from __future__ import annotations + import random from typing import Dict, Optional, Tuple, Union @@ -273,12 +275,12 @@ class TFSpeech2TextAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -476,12 +478,12 @@ class TFSpeech2TextDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training=False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -1253,16 +1255,16 @@ class TFSpeech2TextModel(TFSpeech2TextPreTrainedModel): ) def call( self, - input_features: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_features: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1343,17 +1345,17 @@ class TFSpeech2TextForConditionalGeneration(TFSpeech2TextPreTrainedModel, TFCaus @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_features: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_features: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py index 61352843c2..f75bf230c0 100644 --- a/src/transformers/models/swin/modeling_tf_swin.py +++ b/src/transformers/models/swin/modeling_tf_swin.py @@ -15,6 +15,8 @@ """ TF 2.0 Swin Transformer model.""" +from __future__ import annotations + import collections.abc import math import warnings @@ -95,9 +97,9 @@ class TFSwinEncoderOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - reshaped_hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + reshaped_hidden_states: Tuple[tf.Tensor] | None = None @dataclass @@ -130,10 +132,10 @@ class TFSwinModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - pooler_output: Optional[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - reshaped_hidden_states: Optional[Tuple[tf.Tensor]] = None + pooler_output: tf.Tensor | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + reshaped_hidden_states: Tuple[tf.Tensor] | None = None @dataclass @@ -165,11 +167,11 @@ class TFSwinMaskedImageModelingOutput(ModelOutput): include the spatial dimensions. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None reconstruction: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - reshaped_hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + reshaped_hidden_states: Tuple[tf.Tensor] | None = None @property def logits(self): @@ -210,11 +212,11 @@ class TFSwinImageClassifierOutput(ModelOutput): include the spatial dimensions. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None - reshaped_hidden_states: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None + reshaped_hidden_states: Tuple[tf.Tensor] | None = None def window_partition(input_feature: tf.Tensor, window_size: int) -> tf.Tensor: @@ -529,8 +531,8 @@ class TFSwinSelfAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor, ...]: @@ -619,8 +621,8 @@ class TFSwinAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, training: bool = False, ) -> tf.Tensor: @@ -683,7 +685,7 @@ class TFSwinLayer(tf.keras.layers.Layer): self.intermediate = TFSwinIntermediate(config, dim, name="intermediate") self.swin_output = TFSwinOutput(config, dim, name="output") - def get_attn_mask(self, height: int, width: int, window_size: int, shift_size: int) -> Optional[tf.Tensor]: + def get_attn_mask(self, height: int, width: int, window_size: int, shift_size: int) -> tf.Tensor | None: img_mask = tf.zeros((height, width)) height_slices = ((0, -window_size), (-window_size, -shift_size), (-shift_size, -1)) width_slices = ((0, -window_size), (-window_size, -shift_size), (-shift_size, -1)) @@ -725,7 +727,7 @@ class TFSwinLayer(tf.keras.layers.Layer): self, hidden_states: tf.Tensor, input_dimensions: Tuple[int, int], - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, training: bool = False, ) -> tf.Tensor: @@ -832,7 +834,7 @@ class TFSwinStage(tf.keras.layers.Layer): self, hidden_states: tf.Tensor, input_dimensions: Tuple[int, int], - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor, ...]: @@ -886,7 +888,7 @@ class TFSwinEncoder(tf.keras.layers.Layer): self, hidden_states: tf.Tensor, input_dimensions: Tuple[int, int], - head_mask: Optional[tf.Tensor] = None, + head_mask: tf.Tensor | None = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = True, @@ -1128,9 +1130,9 @@ class TFSwinMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1210,9 +1212,9 @@ class TFSwinModel(TFSwinPreTrainedModel): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1317,9 +1319,9 @@ class TFSwinForMaskedImageModeling(TFSwinPreTrainedModel): @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - bool_masked_pos: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + bool_masked_pos: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1449,9 +1451,9 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati @unpack_inputs def call( self, - pixel_values: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - labels: Optional[tf.Tensor] = None, + pixel_values: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + labels: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index ec3e67db26..012f0c41b0 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 T5 model.""" + +from __future__ import annotations + import copy import itertools import math @@ -1148,16 +1151,16 @@ class TFT5Model(TFT5PreTrainedModel): @replace_return_docstrings(output_type=TFSeq2SeqModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1327,17 +1330,17 @@ class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModeling @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_outputs: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + encoder_outputs: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1548,10 +1551,10 @@ class TFT5EncoderModel(TFT5PreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index f876730b09..b17fddc327 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -14,6 +14,9 @@ # limitations under the License. """TF 2.0 TAPAS model.""" + +from __future__ import annotations + import enum import math from dataclasses import dataclass @@ -132,11 +135,11 @@ class TFTableQuestionAnsweringOutput(ModelOutput): the self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - logits_aggregation: Optional[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + logits_aggregation: tf.Tensor | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None class TFTapasEmbeddings(tf.keras.layers.Layer): @@ -486,9 +489,9 @@ class TFTapasLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -565,9 +568,9 @@ class TFTapasEncoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -758,12 +761,12 @@ class TFTapasMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -984,12 +987,12 @@ class TFTapasModel(TFTapasPreTrainedModel): @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1069,16 +1072,16 @@ class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss): @replace_return_docstrings(output_type=TFMaskedLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1281,21 +1284,21 @@ class TFTapasForQuestionAnswering(TFTapasPreTrainedModel): @replace_return_docstrings(output_type=TFTableQuestionAnsweringOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - table_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - aggregation_labels: Optional[Union[np.ndarray, tf.Tensor]] = None, - float_answer: Optional[Union[np.ndarray, tf.Tensor]] = None, - numeric_values: Optional[Union[np.ndarray, tf.Tensor]] = None, - numeric_values_scale: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + table_mask: np.ndarray | tf.Tensor | None = None, + aggregation_labels: np.ndarray | tf.Tensor | None = None, + float_answer: np.ndarray | tf.Tensor | None = None, + numeric_values: np.ndarray | tf.Tensor | None = None, + numeric_values_scale: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTableQuestionAnsweringOutput, Tuple[tf.Tensor]]: r""" @@ -1606,16 +1609,16 @@ class TFTapasForSequenceClassification(TFTapasPreTrainedModel, TFSequenceClassif @replace_return_docstrings(output_type=TFSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index 93af216511..decf18b8a7 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -17,6 +17,8 @@ TF 2.0 Transformer XL model. """ +from __future__ import annotations + from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -541,14 +543,14 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - mems: Optional[List[tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + mems: List[tf.Tensor] | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ): # the original code for Transformer-XL used shapes [len, bsz] but we want a unified interface in the library @@ -722,8 +724,8 @@ class TFTransfoXLModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None mems: List[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -755,8 +757,8 @@ class TFTransfoXLLMHeadModelOutput(ModelOutput): prediction_scores: tf.Tensor = None mems: List[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -786,11 +788,11 @@ class TFTransfoXLSequenceClassifierOutputWithPast(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None mems: List[tf.Tensor] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None TRANSFO_XL_START_DOCSTRING = r""" @@ -892,10 +894,10 @@ class TFTransfoXLModel(TFTransfoXLPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - mems: Optional[List[tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + mems: List[tf.Tensor] | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -971,14 +973,14 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - mems: Optional[List[tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + mems: List[tf.Tensor] | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ): if input_ids is not None: @@ -1075,14 +1077,14 @@ class TFTransfoXLForSequenceClassification(TFTransfoXLPreTrainedModel, TFSequenc ) def call( self, - input_ids: Optional[TFModelInputType] = None, - mems: Optional[List[tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + mems: List[tf.Tensor] | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[Tuple, TFTransfoXLSequenceClassifierOutputWithPast]: r""" diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index 439c5d668a..ad39a0ae82 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -15,6 +15,8 @@ """ Classes to support TF Vision-Encoder-Text-Decoder architectures""" +from __future__ import annotations + import re import warnings from typing import Optional, Tuple, Union @@ -492,13 +494,13 @@ class TFVisionEncoderDecoderModel(TFPreTrainedModel, TFCausalLanguageModelingLos @replace_return_docstrings(output_type=TFSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: np.ndarray | tf.Tensor | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - decoder_inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + decoder_inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py index a2211f245e..6e0c65a813 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py @@ -15,6 +15,8 @@ """TensorFlow VisionTextDualEncoder model.""" +from __future__ import annotations + import re from typing import Optional, Tuple, Union @@ -340,12 +342,12 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel): @replace_return_docstrings(output_type=TFCLIPOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[tf.Tensor] = None, - pixel_values: Optional[tf.Tensor] = None, - attention_mask: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, + input_ids: tf.Tensor | None = None, + pixel_values: tf.Tensor | None = None, + attention_mask: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, return_loss: Optional[bool] = None, - token_type_ids: Optional[tf.Tensor] = None, + token_type_ids: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/vit/modeling_tf_vit.py b/src/transformers/models/vit/modeling_tf_vit.py index 6d0c579a43..6a07719c91 100644 --- a/src/transformers/models/vit/modeling_tf_vit.py +++ b/src/transformers/models/vit/modeling_tf_vit.py @@ -15,6 +15,8 @@ """ TF 2.0 ViT model.""" +from __future__ import annotations + import collections.abc import math from typing import Dict, Optional, Tuple, Union @@ -487,8 +489,8 @@ class TFViTMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, interpolate_pos_encoding: Optional[bool] = None, @@ -675,8 +677,8 @@ class TFViTModel(TFViTPreTrainedModel): ) def call( self, - pixel_values: Optional[TFModelInputType] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, interpolate_pos_encoding: Optional[bool] = None, @@ -766,13 +768,13 @@ class TFViTForImageClassification(TFViTPreTrainedModel, TFSequenceClassification ) def call( self, - pixel_values: Optional[TFModelInputType] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + pixel_values: TFModelInputType | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, interpolate_pos_encoding: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py index afb40478cc..5f5c1a6830 100644 --- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py @@ -14,6 +14,9 @@ # limitations under the License. """ TF 2.0 ViT MAE (masked autoencoder) model.""" + +from __future__ import annotations + import collections.abc import math from copy import deepcopy @@ -74,8 +77,8 @@ class TFViTMAEModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None mask: tf.Tensor = None ids_restore: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -97,8 +100,8 @@ class TFViTMAEDecoderOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -125,12 +128,12 @@ class TFViTMAEForPreTrainingOutput(ModelOutput): the self-attention heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None mask: tf.Tensor = None ids_restore: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None def get_2d_sincos_pos_embed(embed_dim, grid_size, add_cls_token=False): @@ -232,7 +235,7 @@ class TFViTMAEEmbeddings(tf.keras.layers.Layer): super().build(input_shape) - def random_masking(self, sequence: tf.Tensor, noise: Optional[tf.Tensor] = None): + def random_masking(self, sequence: tf.Tensor, noise: tf.Tensor | None = None): """ Perform per-sample random masking by per-sample shuffling. Per-sample shuffling is done by argsort random noise. @@ -639,9 +642,9 @@ class TFViTMAEMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, noise: tf.Tensor = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -816,9 +819,9 @@ class TFViTMAEModel(TFViTMAEPreTrainedModel): @replace_return_docstrings(output_type=TFViTMAEModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, noise: tf.Tensor = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1107,9 +1110,9 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel): @replace_return_docstrings(output_type=TFViTMAEForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def call( self, - pixel_values: Optional[TFModelInputType] = None, + pixel_values: TFModelInputType | None = None, noise: tf.Tensor = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index dcc59d7f73..3ee16127b3 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -14,6 +14,9 @@ # limitations under the License. """ TensorFlow Wav2Vec2 model.""" + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple, Union @@ -84,8 +87,8 @@ class TFWav2Vec2BaseModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None extract_features: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None def _sample_without_replacement(distribution, num_samples): @@ -673,12 +676,12 @@ class TFWav2Vec2Attention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -841,7 +844,7 @@ class TFWav2Vec2EncoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -884,7 +887,7 @@ class TFWav2Vec2EncoderLayerStableLayerNorm(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -917,7 +920,7 @@ class TFWav2Vec2Encoder(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, @@ -984,7 +987,7 @@ class TFWav2Vec2EncoderStableLayerNorm(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = False, output_hidden_states: Optional[bool] = False, return_dict: Optional[bool] = True, @@ -1074,7 +1077,7 @@ class TFWav2Vec2MainLayer(tf.keras.layers.Layer): return input_lengths - def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: Optional[tf.Tensor] = None): + def _mask_hidden_states(self, hidden_states: tf.Tensor, mask_time_indices: tf.Tensor | None = None): """ Masks extracted features along time axis and/or along feature axis according to [SpecAugment](https://arxiv.org/abs/1904.08779). @@ -1122,11 +1125,11 @@ class TFWav2Vec2MainLayer(tf.keras.layers.Layer): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1367,11 +1370,11 @@ class TFWav2Vec2Model(TFWav2Vec2PreTrainedModel): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -1473,13 +1476,13 @@ class TFWav2Vec2ForCTC(TFWav2Vec2PreTrainedModel): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - token_type_ids: Optional[tf.Tensor] = None, - position_ids: Optional[tf.Tensor] = None, - head_mask: Optional[tf.Tensor] = None, - inputs_embeds: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, + token_type_ids: tf.Tensor | None = None, + position_ids: tf.Tensor | None = None, + head_mask: tf.Tensor | None = None, + inputs_embeds: tf.Tensor | None = None, output_attentions: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, training: Optional[bool] = False, @@ -1639,11 +1642,11 @@ class TFWav2Vec2ForSequenceClassification(TFWav2Vec2PreTrainedModel): def call( self, input_values: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, + attention_mask: tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[tf.Tensor] = None, + labels: tf.Tensor | None = None, training: bool = False, ): return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 0d2a2682cc..11168df3f9 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -15,6 +15,8 @@ """ TensorFlow Whisper model.""" +from __future__ import annotations + import math import random from typing import Dict, Optional, Tuple, Union @@ -171,12 +173,12 @@ class TFWhisperAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -376,12 +378,12 @@ class TFWhisperDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training=False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -1119,13 +1121,13 @@ class TFWhisperModel(TFWhisperPreTrainedModel): @unpack_inputs def call( self, - input_features: Optional[TFModelInputType] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_features: TFModelInputType | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, decoder_inputs_embeds: Optional[Tuple[Union[np.ndarray, tf.Tensor]]] = None, @@ -1234,17 +1236,17 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua @unpack_inputs def call( self, - input_features: Optional[TFModelInputType] = None, - decoder_input_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - decoder_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_features: TFModelInputType | None = None, + decoder_input_ids: np.ndarray | tf.Tensor | None = None, + decoder_attention_mask: np.ndarray | tf.Tensor | None = None, + decoder_position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + decoder_head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, encoder_outputs: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, decoder_inputs_embeds: Optional[Tuple[Union[np.ndarray, tf.Tensor]]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index 1a0146bf19..236720ae49 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -15,6 +15,8 @@ """ TF 2.0 XGLM model.""" +from __future__ import annotations + import math import random from typing import Any, Optional, Tuple, Union @@ -185,12 +187,12 @@ class TFXGLMAttention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training: Optional[bool] = False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -337,12 +339,12 @@ class TFXGLMDecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training: Optional[bool] = False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ @@ -456,13 +458,13 @@ class TFXGLMMainLayer(tf.keras.layers.Layer): def _prepare_decoder_attention_mask( self, - attention_mask: Optional[tf.Tensor], + attention_mask: tf.Tensor | None, input_shape: tf.TensorShape, past_key_values_length: int, ) -> tf.Tensor: # create causal mask # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] - combined_attention_mask: Optional[tf.Tensor] = None + combined_attention_mask: tf.Tensor | None = None if input_shape[-1] > 1: combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length) @@ -476,7 +478,7 @@ class TFXGLMMainLayer(tf.keras.layers.Layer): return combined_attention_mask - def embed_positions(self, position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None) -> tf.Tensor: + def embed_positions(self, position_ids: np.ndarray | tf.Tensor | None = None) -> tf.Tensor: position_ids += self.offset positions = tf.gather(self._embed_positions_weights, position_ids, axis=0) return positions @@ -484,15 +486,15 @@ class TFXGLMMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -785,15 +787,15 @@ class TFXGLMModel(TFXGLMPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -905,16 +907,16 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - cross_attn_head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + cross_attn_head_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + labels: np.ndarray | tf.Tensor | None = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index da9bd1c603..1815b27c85 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -16,6 +16,9 @@ TF 2.0 XLM model. """ + +from __future__ import annotations + import itertools import warnings from dataclasses import dataclass @@ -558,8 +561,8 @@ class TFXLMWithLMHeadModelOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None XLM_START_DOCSTRING = r""" @@ -833,15 +836,15 @@ class TFXLMWithLMHeadModel(TFXLMPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -904,19 +907,19 @@ class TFXLMForSequenceClassification(TFXLMPreTrainedModel, TFSequenceClassificat ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1010,19 +1013,19 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: if input_ids is not None: @@ -1133,19 +1136,19 @@ class TFXLMForTokenClassification(TFXLMPreTrainedModel, TFTokenClassificationLos ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1217,20 +1220,20 @@ class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel, TFQuestionAnsweringL ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - langs: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - lengths: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + langs: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + lengths: np.ndarray | tf.Tensor | None = None, cache: Optional[Dict[str, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py index 2f51c032f1..ae2bae7d7a 100644 --- a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py @@ -15,6 +15,9 @@ # limitations under the License. """ TF 2.0 XLM-RoBERTa model.""" + +from __future__ import annotations + import math import warnings from typing import Optional, Tuple, Union @@ -520,9 +523,9 @@ class TFXLMRobertaLayer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -599,9 +602,9 @@ class TFXLMRobertaEncoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -699,14 +702,14 @@ class TFXLMRobertaMainLayer(tf.keras.layers.Layer): # Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.call def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -917,14 +920,14 @@ class TFXLMRobertaModel(TFXLMRobertaPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1077,16 +1080,16 @@ class TFXLMRobertaForMaskedLM(TFXLMRobertaPreTrainedModel, TFMaskedLanguageModel ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1179,20 +1182,20 @@ class TFXLMRobertaForCausalLM(TFXLMRobertaPreTrainedModel, TFCausalLanguageModel ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1332,16 +1335,16 @@ class TFXLMRobertaForSequenceClassification(TFXLMRobertaPreTrainedModel, TFSeque ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1429,16 +1432,16 @@ class TFXLMRobertaForMultipleChoice(TFXLMRobertaPreTrainedModel, TFMultipleChoic ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1546,16 +1549,16 @@ class TFXLMRobertaForTokenClassification(TFXLMRobertaPreTrainedModel, TFTokenCla ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1632,17 +1635,17 @@ class TFXLMRobertaForQuestionAnswering(TFXLMRobertaPreTrainedModel, TFQuestionAn ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 52538ced57..1d8a6692c0 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -17,6 +17,9 @@ TF 2.0 XLNet model. """ + +from __future__ import annotations + import warnings from dataclasses import dataclass from typing import List, Optional, Tuple, Union @@ -195,9 +198,9 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer): attn_mask_g, r, seg_mat, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + mems: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ): @@ -369,9 +372,9 @@ class TFXLNetLayer(tf.keras.layers.Layer): attn_mask, pos_emb, seg_mat, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + mems: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = False, training: bool = False, ): @@ -582,15 +585,15 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -824,9 +827,9 @@ class TFXLNetModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -859,11 +862,11 @@ class TFXLNetLMHeadModelOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -893,11 +896,11 @@ class TFXLNetForSequenceClassificationOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -927,11 +930,11 @@ class TFXLNetForTokenClassificationOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -963,11 +966,11 @@ class TFXLNetForMultipleChoiceOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None @dataclass @@ -999,12 +1002,12 @@ class TFXLNetForQuestionAnsweringSimpleOutput(ModelOutput): heads. """ - loss: Optional[tf.Tensor] = None + loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - mems: Optional[List[tf.Tensor]] = None - hidden_states: Optional[Tuple[tf.Tensor]] = None - attentions: Optional[Tuple[tf.Tensor]] = None + mems: List[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor] | None = None XLNET_START_DOCSTRING = r""" @@ -1140,15 +1143,15 @@ class TFXLNetModel(TFXLNetPreTrainedModel): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1249,20 +1252,20 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss): @replace_return_docstrings(output_type=TFXLNetLMHeadModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetLMHeadModelOutput, Tuple[tf.Tensor]]: r""" @@ -1379,20 +1382,20 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetForSequenceClassificationOutput, Tuple[tf.Tensor]]: r""" @@ -1484,20 +1487,20 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss): ) def call( self, - input_ids: Optional[TFModelInputType] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetForMultipleChoiceOutput, Tuple[tf.Tensor]]: r""" @@ -1604,20 +1607,20 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetForTokenClassificationOutput, Tuple[tf.Tensor]]: r""" @@ -1689,21 +1692,21 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - mems: Optional[Union[np.ndarray, tf.Tensor]] = None, - perm_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - target_mapping: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - input_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + mems: np.ndarray | tf.Tensor | None = None, + perm_mask: np.ndarray | tf.Tensor | None = None, + target_mapping: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + input_mask: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, use_mems: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: bool = False, ) -> Union[TFXLNetForQuestionAnsweringSimpleOutput, Tuple[tf.Tensor]]: r""" diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index ffe5e7de95..80e2d8ed1e 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -386,9 +386,9 @@ class TF{{cookiecutter.camelcase_modelname}}Layer(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_value: Optional[Tuple[tf.Tensor]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_value: Tuple[tf.Tensor] | None, output_attentions: bool, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -465,9 +465,9 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): hidden_states: tf.Tensor, attention_mask: tf.Tensor, head_mask: tf.Tensor, - encoder_hidden_states: Optional[tf.Tensor], - encoder_attention_mask: Optional[tf.Tensor], - past_key_values: Optional[Tuple[Tuple[tf.Tensor]]], + encoder_hidden_states: tf.Tensor | None, + encoder_attention_mask: tf.Tensor | None, + past_key_values: Tuple[Tuple[tf.Tensor]] | None, use_cache: Optional[bool], output_attentions: bool, output_hidden_states: bool, @@ -639,14 +639,14 @@ class TF{{cookiecutter.camelcase_modelname}}MainLayer(tf.keras.layers.Layer): @unpack_inputs def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -937,14 +937,14 @@ class TF{{cookiecutter.camelcase_modelname}}Model(TF{{cookiecutter.camelcase_mod ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1038,16 +1038,16 @@ class TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(TF{{cookiecutter.camelca ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMaskedLMOutput, Tuple[tf.Tensor]]: r""" @@ -1129,20 +1129,20 @@ class TF{{cookiecutter.camelcase_modelname}}ForCausalLM(TF{{cookiecutter.camelca ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_hidden_states: Optional[Union[np.ndarray, tf.Tensor]] = None, - encoder_attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, + encoder_hidden_states: np.ndarray | tf.Tensor | None = None, + encoder_attention_mask: np.ndarray | tf.Tensor | None = None, past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFCausalLMOutputWithCrossAttentions, Tuple[tf.Tensor]]: r""" @@ -1274,16 +1274,16 @@ class TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification(TF{{cookie ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1362,16 +1362,16 @@ class TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(TF{{cookiecutter.c ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFMultipleChoiceModelOutput, Tuple[tf.Tensor]]: r""" @@ -1487,16 +1487,16 @@ class TF{{cookiecutter.camelcase_modelname}}ForTokenClassification(TF{{cookiecut ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + labels: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]: r""" @@ -1566,17 +1566,17 @@ class TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(TF{{cookiecutte ) def call( self, - input_ids: Optional[TFModelInputType] = None, - attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, - head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, - inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + input_ids: TFModelInputType | None = None, + attention_mask: np.ndarray | tf.Tensor | None = None, + token_type_ids: np.ndarray | tf.Tensor | None = None, + position_ids: np.ndarray | tf.Tensor | None = None, + head_mask: np.ndarray | tf.Tensor | None = None, + inputs_embeds: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - start_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, - end_positions: Optional[Union[np.ndarray, tf.Tensor]] = None, + start_positions: np.ndarray | tf.Tensor | None = None, + end_positions: np.ndarray | tf.Tensor | None = None, training: Optional[bool] = False, ) -> Union[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor]]: r""" @@ -1777,12 +1777,12 @@ class TF{{cookiecutter.camelcase_modelname}}Attention(tf.keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - key_value_states: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[Tuple[tf.Tensor]]] = None, - attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, + key_value_states: tf.Tensor | None = None, + past_key_value: Tuple[Tuple[tf.Tensor]] | None = None, + attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, training=False, - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor | None]: """Input shape: Batch x Time x Channel""" # if key_value_states are provided this layer is used as a cross-attention layer @@ -1962,12 +1962,12 @@ class TF{{cookiecutter.camelcase_modelname}}DecoderLayer(tf.keras.layers.Layer): def call( self, hidden_states, - attention_mask: Optional[tf.Tensor] = None, - encoder_hidden_states: Optional[tf.Tensor] = None, - encoder_attention_mask: Optional[tf.Tensor] = None, - layer_head_mask: Optional[tf.Tensor] = None, - cross_attn_layer_head_mask: Optional[tf.Tensor] = None, - past_key_value: Optional[Tuple[tf.Tensor]] = None, + attention_mask: tf.Tensor | None = None, + encoder_hidden_states: tf.Tensor | None = None, + encoder_attention_mask: tf.Tensor | None = None, + layer_head_mask: tf.Tensor | None = None, + cross_attn_layer_head_mask: tf.Tensor | None = None, + past_key_value: Tuple[tf.Tensor] | None = None, training=False, ) -> Tuple[tf.Tensor, tf.Tensor, Tuple[Tuple[tf.Tensor]]]: """ diff --git a/tests/generation/test_tf_logits_process.py b/tests/generation/test_tf_logits_process.py index a1f665c9a7..e87c843d9c 100644 --- a/tests/generation/test_tf_logits_process.py +++ b/tests/generation/test_tf_logits_process.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest import numpy as np diff --git a/tests/generation/test_tf_utils.py b/tests/generation/test_tf_utils.py index 6fdad1ef63..186e0c8d43 100644 --- a/tests/generation/test_tf_utils.py +++ b/tests/generation/test_tf_utils.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os import tempfile import unittest diff --git a/tests/models/albert/test_modeling_tf_albert.py b/tests/models/albert/test_modeling_tf_albert.py index 104fb09252..97b073e850 100644 --- a/tests/models/albert/test_modeling_tf_albert.py +++ b/tests/models/albert/test_modeling_tf_albert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import AlbertConfig, is_tf_available diff --git a/tests/models/auto/test_modeling_tf_auto.py b/tests/models/auto/test_modeling_tf_auto.py index 1a355d88bb..c8754ca427 100644 --- a/tests/models/auto/test_modeling_tf_auto.py +++ b/tests/models/auto/test_modeling_tf_auto.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import copy import tempfile import unittest diff --git a/tests/models/auto/test_modeling_tf_pytorch.py b/tests/models/auto/test_modeling_tf_pytorch.py index c60b8fc2f5..3e213f2956 100644 --- a/tests/models/auto/test_modeling_tf_pytorch.py +++ b/tests/models/auto/test_modeling_tf_pytorch.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available, is_torch_available diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py index 0f0f8f9793..c113011c56 100644 --- a/tests/models/bart/test_modeling_tf_bart.py +++ b/tests/models/bart/test_modeling_tf_bart.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import copy import tempfile import unittest diff --git a/tests/models/bert/test_modeling_tf_bert.py b/tests/models/bert/test_modeling_tf_bert.py index 59521acec3..a8a2159fe1 100644 --- a/tests/models/bert/test_modeling_tf_bert.py +++ b/tests/models/bert/test_modeling_tf_bert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import BertConfig, is_tf_available diff --git a/tests/models/blenderbot/test_modeling_tf_blenderbot.py b/tests/models/blenderbot/test_modeling_tf_blenderbot.py index 2db959e9f7..5fd6faefec 100644 --- a/tests/models/blenderbot/test_modeling_tf_blenderbot.py +++ b/tests/models/blenderbot/test_modeling_tf_blenderbot.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import BlenderbotConfig, BlenderbotTokenizer, is_tf_available diff --git a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py index 67a4f7ad7b..5bc5c4afe9 100644 --- a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py +++ b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import BlenderbotSmallConfig, BlenderbotSmallTokenizer, is_tf_available diff --git a/tests/models/blip/test_modeling_tf_blip.py b/tests/models/blip/test_modeling_tf_blip.py index 3bb7b87edb..af7533c698 100644 --- a/tests/models/blip/test_modeling_tf_blip.py +++ b/tests/models/blip/test_modeling_tf_blip.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow Blip model. """ +from __future__ import annotations + import inspect import tempfile import unittest diff --git a/tests/models/blip/test_modeling_tf_blip_text.py b/tests/models/blip/test_modeling_tf_blip_text.py index 261056e918..2733a9fa6a 100644 --- a/tests/models/blip/test_modeling_tf_blip_text.py +++ b/tests/models/blip/test_modeling_tf_blip_text.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Testing suite for the TensorFlow Blip model. """ +from __future__ import annotations + import unittest import numpy as np diff --git a/tests/models/bort/test_modeling_tf_bort.py b/tests/models/bort/test_modeling_tf_bort.py index 8053afbd30..35abe53d89 100644 --- a/tests/models/bort/test_modeling_tf_bort.py +++ b/tests/models/bort/test_modeling_tf_bort.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/camembert/test_modeling_tf_camembert.py b/tests/models/camembert/test_modeling_tf_camembert.py index dc54252685..425bdbc4b0 100644 --- a/tests/models/camembert/test_modeling_tf_camembert.py +++ b/tests/models/camembert/test_modeling_tf_camembert.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py index 6cd20a47a7..10b9954fc8 100644 --- a/tests/models/clip/test_modeling_tf_clip.py +++ b/tests/models/clip/test_modeling_tf_clip.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow CLIP model. """ +from __future__ import annotations + import inspect import os import tempfile diff --git a/tests/models/convbert/test_modeling_tf_convbert.py b/tests/models/convbert/test_modeling_tf_convbert.py index 0c259110e7..84ed4de818 100644 --- a/tests/models/convbert/test_modeling_tf_convbert.py +++ b/tests/models/convbert/test_modeling_tf_convbert.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os import tempfile import unittest diff --git a/tests/models/convnext/test_modeling_tf_convnext.py b/tests/models/convnext/test_modeling_tf_convnext.py index 72981c09d6..8d049cf9f5 100644 --- a/tests/models/convnext/test_modeling_tf_convnext.py +++ b/tests/models/convnext/test_modeling_tf_convnext.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow ConvNext model. """ +from __future__ import annotations + import inspect import unittest from typing import List, Tuple diff --git a/tests/models/ctrl/test_modeling_tf_ctrl.py b/tests/models/ctrl/test_modeling_tf_ctrl.py index c71c96bc9d..4d94a97828 100644 --- a/tests/models/ctrl/test_modeling_tf_ctrl.py +++ b/tests/models/ctrl/test_modeling_tf_ctrl.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import CTRLConfig, is_tf_available diff --git a/tests/models/cvt/test_modeling_tf_cvt.py b/tests/models/cvt/test_modeling_tf_cvt.py index f0f7b9c5d8..78d95931b3 100644 --- a/tests/models/cvt/test_modeling_tf_cvt.py +++ b/tests/models/cvt/test_modeling_tf_cvt.py @@ -1,6 +1,8 @@ """ Testing suite for the Tensorflow CvT model. """ +from __future__ import annotations + import inspect import unittest from math import floor diff --git a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py index dfa890d25a..6a30c83eba 100644 --- a/tests/models/data2vec/test_modeling_tf_data2vec_vision.py +++ b/tests/models/data2vec/test_modeling_tf_data2vec_vision.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow Data2VecVision model. """ +from __future__ import annotations + import collections.abc import inspect import unittest diff --git a/tests/models/deberta/test_modeling_tf_deberta.py b/tests/models/deberta/test_modeling_tf_deberta.py index 424d9e0b2b..9b69d55001 100644 --- a/tests/models/deberta/test_modeling_tf_deberta.py +++ b/tests/models/deberta/test_modeling_tf_deberta.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import DebertaConfig, is_tf_available diff --git a/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py b/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py index 60391635ee..96ebe375d9 100644 --- a/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py +++ b/tests/models/deberta_v2/test_modeling_tf_deberta_v2.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import DebertaV2Config, is_tf_available diff --git a/tests/models/deit/test_modeling_tf_deit.py b/tests/models/deit/test_modeling_tf_deit.py index 223d164d4a..b350a5d546 100644 --- a/tests/models/deit/test_modeling_tf_deit.py +++ b/tests/models/deit/test_modeling_tf_deit.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow DeiT model. """ +from __future__ import annotations + import inspect import unittest @@ -242,7 +244,7 @@ class TFDeiTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: - if model_class.__name__ == "DeiTForImageClassificationWithTeacher": + if "labels" in inputs_dict and "labels" not in inspect.signature(model_class.call).parameters: del inputs_dict["labels"] return inputs_dict diff --git a/tests/models/distilbert/test_modeling_tf_distilbert.py b/tests/models/distilbert/test_modeling_tf_distilbert.py index 1f4f3c2b46..4e96c90976 100644 --- a/tests/models/distilbert/test_modeling_tf_distilbert.py +++ b/tests/models/distilbert/test_modeling_tf_distilbert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import DistilBertConfig, is_tf_available diff --git a/tests/models/dpr/test_modeling_tf_dpr.py b/tests/models/dpr/test_modeling_tf_dpr.py index 64dea041b5..f788a51633 100644 --- a/tests/models/dpr/test_modeling_tf_dpr.py +++ b/tests/models/dpr/test_modeling_tf_dpr.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/electra/test_modeling_tf_electra.py b/tests/models/electra/test_modeling_tf_electra.py index ae092e8a17..fe60c56271 100644 --- a/tests/models/electra/test_modeling_tf_electra.py +++ b/tests/models/electra/test_modeling_tf_electra.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import ElectraConfig, is_tf_available diff --git a/tests/models/encoder_decoder/test_modeling_tf_encoder_decoder.py b/tests/models/encoder_decoder/test_modeling_tf_encoder_decoder.py index 76ebd687f7..aa22e961f6 100644 --- a/tests/models/encoder_decoder/test_modeling_tf_encoder_decoder.py +++ b/tests/models/encoder_decoder/test_modeling_tf_encoder_decoder.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import os import tempfile diff --git a/tests/models/esm/test_modeling_tf_esm.py b/tests/models/esm/test_modeling_tf_esm.py index dc9d430d07..d06e3c59ba 100644 --- a/tests/models/esm/test_modeling_tf_esm.py +++ b/tests/models/esm/test_modeling_tf_esm.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import EsmConfig, is_tf_available diff --git a/tests/models/flaubert/test_modeling_tf_flaubert.py b/tests/models/flaubert/test_modeling_tf_flaubert.py index 6b7f4fc031..b751445d12 100644 --- a/tests/models/flaubert/test_modeling_tf_flaubert.py +++ b/tests/models/flaubert/test_modeling_tf_flaubert.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/funnel/test_modeling_tf_funnel.py b/tests/models/funnel/test_modeling_tf_funnel.py index 6780605e89..5aea7e4309 100644 --- a/tests/models/funnel/test_modeling_tf_funnel.py +++ b/tests/models/funnel/test_modeling_tf_funnel.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import FunnelConfig, is_tf_available diff --git a/tests/models/gpt2/test_modeling_tf_gpt2.py b/tests/models/gpt2/test_modeling_tf_gpt2.py index 7171997546..c69ab86337 100644 --- a/tests/models/gpt2/test_modeling_tf_gpt2.py +++ b/tests/models/gpt2/test_modeling_tf_gpt2.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import GPT2Config, is_tf_available diff --git a/tests/models/gptj/test_modeling_tf_gptj.py b/tests/models/gptj/test_modeling_tf_gptj.py index 3aa63d2790..0e4dc9f583 100644 --- a/tests/models/gptj/test_modeling_tf_gptj.py +++ b/tests/models/gptj/test_modeling_tf_gptj.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import AutoTokenizer, GPTJConfig, is_tf_available diff --git a/tests/models/groupvit/test_modeling_tf_groupvit.py b/tests/models/groupvit/test_modeling_tf_groupvit.py index bd499a50fb..a80ef606e5 100644 --- a/tests/models/groupvit/test_modeling_tf_groupvit.py +++ b/tests/models/groupvit/test_modeling_tf_groupvit.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow GroupViT model. """ +from __future__ import annotations + import inspect import os import random diff --git a/tests/models/hubert/test_modeling_tf_hubert.py b/tests/models/hubert/test_modeling_tf_hubert.py index a48ed0634e..0b8e1e2df9 100644 --- a/tests/models/hubert/test_modeling_tf_hubert.py +++ b/tests/models/hubert/test_modeling_tf_hubert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import inspect import math diff --git a/tests/models/layoutlm/test_modeling_tf_layoutlm.py b/tests/models/layoutlm/test_modeling_tf_layoutlm.py index 95e24023bb..2d134f23d4 100644 --- a/tests/models/layoutlm/test_modeling_tf_layoutlm.py +++ b/tests/models/layoutlm/test_modeling_tf_layoutlm.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest import numpy as np diff --git a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py index df103194ab..a1e2cd5908 100644 --- a/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_modeling_tf_layoutlmv3.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow LayoutLMv3 model. """ +from __future__ import annotations + import copy import inspect import unittest diff --git a/tests/models/led/test_modeling_tf_led.py b/tests/models/led/test_modeling_tf_led.py index 7bac1ced83..8735aeb721 100644 --- a/tests/models/led/test_modeling_tf_led.py +++ b/tests/models/led/test_modeling_tf_led.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import LEDConfig, is_tf_available diff --git a/tests/models/longformer/test_modeling_tf_longformer.py b/tests/models/longformer/test_modeling_tf_longformer.py index b5452bc80a..dcdd68b18f 100644 --- a/tests/models/longformer/test_modeling_tf_longformer.py +++ b/tests/models/longformer/test_modeling_tf_longformer.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/lxmert/test_modeling_tf_lxmert.py b/tests/models/lxmert/test_modeling_tf_lxmert.py index cd2095f693..411de960f3 100644 --- a/tests/models/lxmert/test_modeling_tf_lxmert.py +++ b/tests/models/lxmert/test_modeling_tf_lxmert.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import os import tempfile import unittest diff --git a/tests/models/marian/test_modeling_tf_marian.py b/tests/models/marian/test_modeling_tf_marian.py index 16b19b0f97..5a624fda9a 100644 --- a/tests/models/marian/test_modeling_tf_marian.py +++ b/tests/models/marian/test_modeling_tf_marian.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import tempfile import unittest import warnings diff --git a/tests/models/mbart/test_modeling_tf_mbart.py b/tests/models/mbart/test_modeling_tf_mbart.py index b143fc6877..6c36d705e8 100644 --- a/tests/models/mbart/test_modeling_tf_mbart.py +++ b/tests/models/mbart/test_modeling_tf_mbart.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import tempfile import unittest diff --git a/tests/models/mobilebert/test_modeling_tf_mobilebert.py b/tests/models/mobilebert/test_modeling_tf_mobilebert.py index 69d2fc6768..293126ab61 100644 --- a/tests/models/mobilebert/test_modeling_tf_mobilebert.py +++ b/tests/models/mobilebert/test_modeling_tf_mobilebert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import MobileBertConfig, is_tf_available diff --git a/tests/models/mobilevit/test_modeling_tf_mobilevit.py b/tests/models/mobilevit/test_modeling_tf_mobilevit.py index e4a956dff2..37d7db39e6 100644 --- a/tests/models/mobilevit/test_modeling_tf_mobilevit.py +++ b/tests/models/mobilevit/test_modeling_tf_mobilevit.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow MobileViT model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/mpnet/test_modeling_tf_mpnet.py b/tests/models/mpnet/test_modeling_tf_mpnet.py index 4936a52899..381b6e81dd 100644 --- a/tests/models/mpnet/test_modeling_tf_mpnet.py +++ b/tests/models/mpnet/test_modeling_tf_mpnet.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import MPNetConfig, is_tf_available diff --git a/tests/models/mt5/test_modeling_tf_mt5.py b/tests/models/mt5/test_modeling_tf_mt5.py index 0c934f0314..facb63dd79 100644 --- a/tests/models/mt5/test_modeling_tf_mt5.py +++ b/tests/models/mt5/test_modeling_tf_mt5.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/openai/test_modeling_tf_openai.py b/tests/models/openai/test_modeling_tf_openai.py index a4cf71bf1a..a82da911a4 100644 --- a/tests/models/openai/test_modeling_tf_openai.py +++ b/tests/models/openai/test_modeling_tf_openai.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import OpenAIGPTConfig, is_tf_available diff --git a/tests/models/opt/test_modeling_tf_opt.py b/tests/models/opt/test_modeling_tf_opt.py index 0ae3411812..85514c9d72 100644 --- a/tests/models/opt/test_modeling_tf_opt.py +++ b/tests/models/opt/test_modeling_tf_opt.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest import numpy as np diff --git a/tests/models/pegasus/test_modeling_tf_pegasus.py b/tests/models/pegasus/test_modeling_tf_pegasus.py index 6816cc34ef..b34a3dcfb5 100644 --- a/tests/models/pegasus/test_modeling_tf_pegasus.py +++ b/tests/models/pegasus/test_modeling_tf_pegasus.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import tempfile import unittest diff --git a/tests/models/rag/test_modeling_tf_rag.py b/tests/models/rag/test_modeling_tf_rag.py index 4a0e4176b4..b4720f7c7f 100644 --- a/tests/models/rag/test_modeling_tf_rag.py +++ b/tests/models/rag/test_modeling_tf_rag.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import json import os import shutil diff --git a/tests/models/regnet/test_modeling_tf_regnet.py b/tests/models/regnet/test_modeling_tf_regnet.py index f5f5cfd4b9..cee3995d21 100644 --- a/tests/models/regnet/test_modeling_tf_regnet.py +++ b/tests/models/regnet/test_modeling_tf_regnet.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow RegNet model. """ +from __future__ import annotations + import inspect import unittest from typing import List, Tuple diff --git a/tests/models/rembert/test_modeling_tf_rembert.py b/tests/models/rembert/test_modeling_tf_rembert.py index 7ab71e9c6e..e70bd7033f 100644 --- a/tests/models/rembert/test_modeling_tf_rembert.py +++ b/tests/models/rembert/test_modeling_tf_rembert.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import RemBertConfig, is_tf_available diff --git a/tests/models/resnet/test_modeling_tf_resnet.py b/tests/models/resnet/test_modeling_tf_resnet.py index 0a8ccc0041..e6f8d121c2 100644 --- a/tests/models/resnet/test_modeling_tf_resnet.py +++ b/tests/models/resnet/test_modeling_tf_resnet.py @@ -15,6 +15,8 @@ """ Testing suite for the Tensorflow ResNet model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/roberta/test_modeling_tf_roberta.py b/tests/models/roberta/test_modeling_tf_roberta.py index efa54ba45f..3d7b6953c0 100644 --- a/tests/models/roberta/test_modeling_tf_roberta.py +++ b/tests/models/roberta/test_modeling_tf_roberta.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import RobertaConfig, is_tf_available diff --git a/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py b/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py index 6de20c0e1d..4e1bd2e319 100644 --- a/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py +++ b/tests/models/roberta_prelayernorm/test_modeling_tf_roberta_prelayernorm.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import RobertaPreLayerNormConfig, is_tf_available diff --git a/tests/models/roformer/test_modeling_tf_roformer.py b/tests/models/roformer/test_modeling_tf_roformer.py index 0a632e39a2..52c630e2be 100644 --- a/tests/models/roformer/test_modeling_tf_roformer.py +++ b/tests/models/roformer/test_modeling_tf_roformer.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import RoFormerConfig, is_tf_available diff --git a/tests/models/sam/test_modeling_tf_sam.py b/tests/models/sam/test_modeling_tf_sam.py index fc8dd79765..4e918a1cd1 100644 --- a/tests/models/sam/test_modeling_tf_sam.py +++ b/tests/models/sam/test_modeling_tf_sam.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow SAM model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/segformer/test_modeling_tf_segformer.py b/tests/models/segformer/test_modeling_tf_segformer.py index 79c58ebe40..b831e8ddbc 100644 --- a/tests/models/segformer/test_modeling_tf_segformer.py +++ b/tests/models/segformer/test_modeling_tf_segformer.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow SegFormer model. """ +from __future__ import annotations + import inspect import unittest from typing import List, Tuple diff --git a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py index 75789fd6d9..b283b4478b 100644 --- a/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py +++ b/tests/models/speech_to_text/test_modeling_tf_speech_to_text.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow Speech2Text model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/swin/test_modeling_tf_swin.py b/tests/models/swin/test_modeling_tf_swin.py index 32de917a11..a898d22fb1 100644 --- a/tests/models/swin/test_modeling_tf_swin.py +++ b/tests/models/swin/test_modeling_tf_swin.py @@ -15,6 +15,8 @@ """ Testing suite for the TF 2.0 Swin model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py index a1d784ae2f..7a75f51cd7 100644 --- a/tests/models/t5/test_modeling_tf_t5.py +++ b/tests/models/t5/test_modeling_tf_t5.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import T5Config, is_tf_available diff --git a/tests/models/tapas/test_modeling_tf_tapas.py b/tests/models/tapas/test_modeling_tf_tapas.py index c3cc5fae3a..ce98394cb8 100644 --- a/tests/models/tapas/test_modeling_tf_tapas.py +++ b/tests/models/tapas/test_modeling_tf_tapas.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import copy import unittest diff --git a/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py b/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py index 47880013b9..ac820ea8fa 100644 --- a/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py +++ b/tests/models/transfo_xl/test_modeling_tf_transfo_xl.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import random import unittest diff --git a/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py b/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py index 1e594f5de5..04062014b8 100644 --- a/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py +++ b/tests/models/vision_encoder_decoder/test_modeling_tf_vision_encoder_decoder.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow VisionEncoderDecoder model. """ +from __future__ import annotations + import copy import os import tempfile diff --git a/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py b/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py index 696a302722..1f27f831e8 100644 --- a/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py +++ b/tests/models/vision_text_dual_encoder/test_modeling_tf_vision_text_dual_encoder.py @@ -15,6 +15,8 @@ """ Testing suite for the PyTorch VisionTextDualEncoder model. """ +from __future__ import annotations + import collections import tempfile import unittest diff --git a/tests/models/vit/test_modeling_tf_vit.py b/tests/models/vit/test_modeling_tf_vit.py index 111223de32..72ca1b19dc 100644 --- a/tests/models/vit/test_modeling_tf_vit.py +++ b/tests/models/vit/test_modeling_tf_vit.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow ViT model. """ +from __future__ import annotations + import inspect import unittest diff --git a/tests/models/vit_mae/test_modeling_tf_vit_mae.py b/tests/models/vit_mae/test_modeling_tf_vit_mae.py index 53d68b644a..d5e16e9638 100644 --- a/tests/models/vit_mae/test_modeling_tf_vit_mae.py +++ b/tests/models/vit_mae/test_modeling_tf_vit_mae.py @@ -15,6 +15,8 @@ """ Testing suite for the TensorFlow ViTMAE model. """ +from __future__ import annotations + import copy import inspect import json diff --git a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py index e30aeb6aaa..ef4c38e2a3 100644 --- a/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py +++ b/tests/models/wav2vec2/test_modeling_tf_wav2vec2.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import glob import inspect diff --git a/tests/models/whisper/test_modeling_tf_whisper.py b/tests/models/whisper/test_modeling_tf_whisper.py index a52994899a..b9ad982176 100644 --- a/tests/models/whisper/test_modeling_tf_whisper.py +++ b/tests/models/whisper/test_modeling_tf_whisper.py @@ -14,6 +14,8 @@ # limitations under the License. """ Testing suite for the TensorFlow Whisper model. """ +from __future__ import annotations + import inspect import tempfile import traceback diff --git a/tests/models/xglm/test_modeling_tf_xglm.py b/tests/models/xglm/test_modeling_tf_xglm.py index 61fd805725..e2b8cc2e6c 100644 --- a/tests/models/xglm/test_modeling_tf_xglm.py +++ b/tests/models/xglm/test_modeling_tf_xglm.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import XGLMConfig, XGLMTokenizer, is_tf_available diff --git a/tests/models/xlm/test_modeling_tf_xlm.py b/tests/models/xlm/test_modeling_tf_xlm.py index 2b1fb2f963..5b576f02c9 100644 --- a/tests/models/xlm/test_modeling_tf_xlm.py +++ b/tests/models/xlm/test_modeling_tf_xlm.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/xlm_roberta/test_modeling_tf_xlm_roberta.py b/tests/models/xlm_roberta/test_modeling_tf_xlm_roberta.py index 695a403b7b..1ecac55310 100644 --- a/tests/models/xlm_roberta/test_modeling_tf_xlm_roberta.py +++ b/tests/models/xlm_roberta/test_modeling_tf_xlm_roberta.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import unittest from transformers import is_tf_available diff --git a/tests/models/xlnet/test_modeling_tf_xlnet.py b/tests/models/xlnet/test_modeling_tf_xlnet.py index bbc310aa8b..6d76462fda 100644 --- a/tests/models/xlnet/test_modeling_tf_xlnet.py +++ b/tests/models/xlnet/test_modeling_tf_xlnet.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import inspect import random import unittest diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 220560d923..02d5077e23 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import inspect import json @@ -22,10 +24,9 @@ import random import tempfile import unittest import unittest.mock as mock -from dataclasses import fields from importlib import import_module from math import isnan -from typing import List, Tuple, get_type_hints +from typing import List, Tuple from datasets import Dataset from huggingface_hub import HfFolder, Repository, delete_repo @@ -140,26 +141,6 @@ def _config_zero_init(config): return configs_no_init -def _return_type_has_loss(model): - return_type = get_type_hints(model.call) - if "return" not in return_type: - return False - return_type = return_type["return"] - if hasattr(return_type, "__args__"): # Awkward check for union because UnionType only turns up in 3.10 - for type_annotation in return_type.__args__: - if inspect.isclass(type_annotation) and issubclass(type_annotation, ModelOutput): - field_names = [field.name for field in fields(type_annotation)] - if "loss" in field_names: - return True - return False - elif isinstance(return_type, tuple): - return False - elif isinstance(return_type, ModelOutput): - class_fields = fields(return_type) - return "loss" in class_fields - return False - - @require_tf class TFModelTesterMixin: model_tester = None @@ -1464,8 +1445,6 @@ class TFModelTesterMixin: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) - if not getattr(model, "hf_compute_loss", None) and not _return_type_has_loss(model): - continue # The number of elements in the loss should be the same as the number of elements in the label prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True) added_label_names = sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True) @@ -1480,7 +1459,11 @@ class TFModelTesterMixin: input_name = possible_input_names.intersection(set(prepared_for_class)).pop() model_input = prepared_for_class.pop(input_name) - loss = model(model_input, **prepared_for_class)[0] + outputs = model(model_input, **prepared_for_class) + if not isinstance(outputs, ModelOutput) or not hasattr(outputs, "loss"): + continue + + loss = outputs.loss self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1]) # Test that model correctly compute the loss when we mask some positions @@ -1540,18 +1523,16 @@ class TFModelTesterMixin: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) - if not getattr(model, "hf_compute_loss", False) and not _return_type_has_loss(model): - continue # Test that model correctly compute the loss with kwargs prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True) - # Is there a better way to remove these decoder inputs? # We also remove "return_loss" as this is covered by the train_step when using fit() prepared_for_class = { key: val for key, val in prepared_for_class.items() - if key - not in ("head_mask", "decoder_head_mask", "cross_attn_head_mask", "decoder_input_ids", "return_loss") + if key not in ("head_mask", "decoder_head_mask", "cross_attn_head_mask", "return_loss") } + if "labels" in prepared_for_class and "decoder_input_ids" in prepared_for_class: + del prepared_for_class["decoder_input_ids"] accuracy_classes = [ "ForPreTraining", @@ -1575,8 +1556,10 @@ class TFModelTesterMixin: sample_weight = tf.convert_to_tensor([0.5] * self.model_tester.batch_size, dtype=tf.float32) else: sample_weight = None - - model(model.dummy_inputs) # Build the model so we can get some constant weights + # Build the model so we can get some constant weights and check outputs + outputs = model(prepared_for_class) + if getattr(outputs, "loss", None) is None: + continue model_weights = model.get_weights() # Run eagerly to save some expensive compilation times @@ -1648,7 +1631,6 @@ class TFModelTesterMixin: # Pass in all samples as a batch to match other `fit` calls weighted_dataset = weighted_dataset.batch(len(dataset)) dataset = dataset.batch(len(dataset)) - # Reinitialize to fix batchnorm again model.set_weights(model_weights) diff --git a/tests/utils/test_modeling_tf_core.py b/tests/utils/test_modeling_tf_core.py index f144a7b8d9..ea5bc26986 100644 --- a/tests/utils/test_modeling_tf_core.py +++ b/tests/utils/test_modeling_tf_core.py @@ -14,6 +14,8 @@ # limitations under the License. +from __future__ import annotations + import copy import os import tempfile