From 8a828a747e65f3b8807d574320f810571a7bdd7e Mon Sep 17 00:00:00 2001 From: cyyever Date: Thu, 3 Apr 2025 23:38:01 +0800 Subject: [PATCH] Add Optional to types (#37163) Signed-off-by: cyy --- src/transformers/generation/flax_utils.py | 8 +-- src/transformers/generation/tf_utils.py | 20 +++--- src/transformers/generation/watermarking.py | 10 +-- .../image_processing_utils_fast.py | 2 +- src/transformers/integrations/executorch.py | 6 +- .../modeling_flash_attention_utils.py | 2 +- src/transformers/modeling_flax_outputs.py | 48 +++++++------- src/transformers/modeling_tf_outputs.py | 64 +++++++++---------- src/transformers/modeling_tf_utils.py | 2 +- .../models/albert/modeling_albert.py | 4 +- .../models/albert/modeling_tf_albert.py | 14 ++-- .../models/align/modeling_align.py | 12 ++-- .../models/altclip/modeling_altclip.py | 8 +-- src/transformers/models/aria/modeling_aria.py | 14 ++-- src/transformers/models/aria/modular_aria.py | 8 +-- .../models/autoformer/modeling_autoformer.py | 10 +-- .../models/aya_vision/modeling_aya_vision.py | 8 +-- .../models/aya_vision/modular_aya_vision.py | 6 +- .../models/bamba/modeling_bamba.py | 4 +- .../models/bamba/modular_bamba.py | 4 +- src/transformers/models/bart/modeling_bart.py | 14 ++-- .../models/beit/image_processing_beit.py | 38 +++++------ src/transformers/models/bert/modeling_bert.py | 4 +- .../models/bert/modeling_tf_bert.py | 12 ++-- .../models/bert/tokenization_bert_tf.py | 10 +-- .../models/big_bird/modeling_big_bird.py | 24 +++---- .../models/big_bird/tokenization_big_bird.py | 2 +- .../modeling_bigbird_pegasus.py | 10 +-- .../models/bit/image_processing_bit.py | 12 ++-- .../models/blenderbot/modeling_blenderbot.py | 2 +- .../modeling_blenderbot_small.py | 2 +- .../models/blip/image_processing_blip.py | 2 +- src/transformers/models/blip/modeling_blip.py | 14 ++-- .../models/blip/modeling_tf_blip.py | 20 +++--- .../models/blip_2/modeling_blip_2.py | 12 ++-- .../bridgetower/modeling_bridgetower.py | 10 +-- src/transformers/models/bros/modeling_bros.py | 4 +- .../models/canine/modeling_canine.py | 4 +- .../chameleon/image_processing_chameleon.py | 12 ++-- .../models/chameleon/modeling_chameleon.py | 8 +-- .../image_processing_chinese_clip.py | 12 ++-- .../chinese_clip/modeling_chinese_clip.py | 8 +-- src/transformers/models/clap/modeling_clap.py | 12 ++-- .../models/clip/image_processing_clip.py | 12 ++-- src/transformers/models/clip/modeling_clip.py | 12 ++-- .../models/clip/modeling_tf_clip.py | 14 ++-- .../models/clipseg/modeling_clipseg.py | 16 ++--- src/transformers/models/clvp/modeling_clvp.py | 24 +++---- .../models/codegen/tokenization_codegen.py | 2 +- .../codegen/tokenization_codegen_fast.py | 2 +- .../models/cohere/modeling_cohere.py | 4 +- .../models/cohere/modular_cohere.py | 2 +- .../models/cohere2/modeling_cohere2.py | 4 +- .../models/cohere2/modular_cohere2.py | 2 +- .../models/colpali/modeling_colpali.py | 6 +- .../image_processing_conditional_detr.py | 4 +- .../modeling_conditional_detr.py | 12 ++-- .../models/convbert/modeling_tf_convbert.py | 8 +-- .../convnext/image_processing_convnext.py | 12 ++-- .../models/convnext/modeling_convnext.py | 4 +- .../models/convnextv2/modeling_convnextv2.py | 4 +- src/transformers/models/cvt/modeling_cvt.py | 4 +- .../models/cvt/modeling_tf_cvt.py | 4 +- .../models/dab_detr/modeling_dab_detr.py | 4 +- src/transformers/models/dac/modeling_dac.py | 20 +++--- .../data2vec/modeling_tf_data2vec_vision.py | 4 +- src/transformers/models/dbrx/modeling_dbrx.py | 2 +- .../models/deberta/modeling_tf_deberta.py | 32 +++++----- .../deberta_v2/modeling_tf_deberta_v2.py | 32 +++++----- .../modeling_decision_transformer.py | 12 ++-- .../deepseek_v3/modeling_deepseek_v3.py | 4 +- .../image_processing_deformable_detr.py | 4 +- .../image_processing_deformable_detr_fast.py | 2 +- .../modeling_deformable_detr.py | 20 +++--- .../models/deit/image_processing_deit.py | 10 +-- src/transformers/models/deit/modeling_deit.py | 6 +- .../models/deit/modeling_tf_deit.py | 6 +- .../deprecated/deta/image_processing_deta.py | 4 +- .../models/deprecated/deta/modeling_deta.py | 20 +++--- .../image_processing_efficientformer.py | 2 +- .../modeling_efficientformer.py | 6 +- .../modeling_tf_efficientformer.py | 6 +- .../graphormer/configuration_graphormer.py | 2 +- .../models/deprecated/nat/modeling_nat.py | 6 +- .../models/deprecated/nezha/modeling_nezha.py | 4 +- .../open_llama/modeling_open_llama.py | 6 +- .../models/deprecated/realm/modeling_realm.py | 20 +++--- .../modeling_trajectory_transformer.py | 2 +- .../transfo_xl/modeling_tf_transfo_xl.py | 6 +- .../transfo_xl/modeling_transfo_xl.py | 4 +- .../deprecated/tvlt/image_processing_tvlt.py | 20 +++--- .../models/deprecated/tvlt/modeling_tvlt.py | 22 +++---- .../vit_hybrid/image_processing_vit_hybrid.py | 12 ++-- .../xlm_prophetnet/modeling_xlm_prophetnet.py | 4 +- .../models/depth_pro/modeling_depth_pro.py | 4 +- .../models/detr/image_processing_detr.py | 4 +- .../models/detr/image_processing_detr_fast.py | 2 +- src/transformers/models/detr/modeling_detr.py | 12 ++-- .../models/diffllama/modeling_diffllama.py | 4 +- .../models/dinat/modeling_dinat.py | 6 +- .../models/donut/image_processing_donut.py | 14 ++-- .../models/donut/modeling_donut_swin.py | 4 +- src/transformers/models/dpr/modeling_dpr.py | 4 +- .../models/dpr/modeling_tf_dpr.py | 26 ++++---- .../models/dpt/image_processing_dpt.py | 44 ++++++------- src/transformers/models/dpt/modeling_dpt.py | 6 +- .../image_processing_efficientnet.py | 14 ++-- .../efficientnet/modeling_efficientnet.py | 4 +- .../models/electra/modeling_electra.py | 2 +- .../models/electra/modeling_tf_electra.py | 10 +-- .../models/emu3/image_processing_emu3.py | 20 +++--- src/transformers/models/emu3/modeling_emu3.py | 12 ++-- src/transformers/models/emu3/modular_emu3.py | 8 +-- .../encodec/feature_extraction_encodec.py | 4 +- .../models/encodec/modeling_encodec.py | 12 ++-- .../models/ernie/modeling_ernie.py | 4 +- .../models/esm/modeling_esmfold.py | 46 ++++++------- .../modeling_fastspeech2_conformer.py | 10 +-- .../models/flaubert/modeling_tf_flaubert.py | 2 +- .../models/flava/image_processing_flava.py | 16 ++--- .../models/flava/modeling_flava.py | 2 +- src/transformers/models/fnet/modeling_fnet.py | 4 +- .../models/fnet/tokenization_fnet.py | 2 +- .../models/focalnet/modeling_focalnet.py | 8 +-- src/transformers/models/fsmt/modeling_fsmt.py | 2 +- .../models/funnel/modeling_funnel.py | 2 +- .../models/funnel/modeling_tf_funnel.py | 2 +- src/transformers/models/fuyu/modeling_fuyu.py | 8 ++- .../models/gemma/modeling_gemma.py | 4 +- .../models/gemma/modular_gemma.py | 2 +- .../models/gemma2/modeling_gemma2.py | 4 +- .../models/gemma2/modular_gemma2.py | 4 +- .../models/gemma3/image_processing_gemma3.py | 20 +++--- .../models/gemma3/modeling_gemma3.py | 8 +-- .../models/gemma3/modular_gemma3.py | 6 +- src/transformers/models/git/modeling_git.py | 2 +- src/transformers/models/glm/modeling_glm.py | 4 +- .../got_ocr2/image_processing_got_ocr2.py | 2 +- .../models/got_ocr2/modeling_got_ocr2.py | 8 +-- .../models/got_ocr2/modular_got_ocr2.py | 4 +- src/transformers/models/gpt2/modeling_gpt2.py | 4 +- .../models/gpt2/modeling_tf_gpt2.py | 4 +- .../models/gpt2/tokenization_gpt2_tf.py | 12 +++- .../models/granite/modeling_granite.py | 4 +- .../models/granite/modular_granite.py | 4 +- .../models/granitemoe/modeling_granitemoe.py | 4 +- .../modeling_granitemoeshared.py | 4 +- .../image_processing_grounding_dino.py | 4 +- .../grounding_dino/modeling_grounding_dino.py | 28 ++++---- .../models/groupvit/modeling_groupvit.py | 10 +-- .../models/groupvit/modeling_tf_groupvit.py | 24 +++---- .../models/helium/modeling_helium.py | 4 +- .../models/hiera/modeling_hiera.py | 12 ++-- .../models/idefics/modeling_idefics.py | 8 +-- .../models/idefics/modeling_tf_idefics.py | 4 +- src/transformers/models/idefics/vision.py | 2 +- src/transformers/models/idefics/vision_tf.py | 2 +- .../models/idefics2/modeling_idefics2.py | 8 +-- .../models/idefics3/modeling_idefics3.py | 8 +-- .../imagegpt/image_processing_imagegpt.py | 4 +- .../models/informer/modeling_informer.py | 2 +- .../image_processing_instructblipvideo.py | 4 +- .../models/jamba/modeling_jamba.py | 4 +- .../models/jetmoe/modeling_jetmoe.py | 4 +- .../models/kosmos2/modeling_kosmos2.py | 18 +++--- .../models/layoutlm/modeling_tf_layoutlm.py | 10 +-- .../layoutlmv2/image_processing_layoutlmv2.py | 4 +- .../layoutlmv2/tokenization_layoutlmv2.py | 6 +- .../tokenization_layoutlmv2_fast.py | 4 +- .../layoutlmv3/image_processing_layoutlmv3.py | 10 +-- .../layoutlmv3/modeling_tf_layoutlmv3.py | 2 +- .../layoutlmv3/tokenization_layoutlmv3.py | 6 +- .../tokenization_layoutlmv3_fast.py | 4 +- .../layoutxlm/tokenization_layoutxlm.py | 4 +- .../layoutxlm/tokenization_layoutxlm_fast.py | 2 +- src/transformers/models/led/modeling_led.py | 10 +-- .../models/led/modeling_tf_led.py | 6 +- .../models/levit/modeling_levit.py | 12 ++-- .../models/llama/modeling_llama.py | 4 +- .../models/llava/image_processing_llava.py | 2 +- .../models/llava/modeling_llava.py | 8 +-- .../llava_next/image_processing_llava_next.py | 22 +++---- .../models/llava_next/modeling_llava_next.py | 6 +- .../image_processing_llava_next_video.py | 24 +++---- .../modeling_llava_next_video.py | 8 +-- .../modular_llava_next_video.py | 6 +- .../image_processing_llava_onevision.py | 20 +++--- .../modeling_llava_onevision.py | 8 +-- .../video_processing_llava_onevision.py | 20 +++--- .../models/longformer/modeling_longformer.py | 14 ++-- .../longformer/modeling_tf_longformer.py | 18 +++--- src/transformers/models/luke/modeling_luke.py | 29 +++++---- .../models/m2m_100/modeling_m2m_100.py | 5 +- .../models/marian/modeling_marian.py | 10 +-- .../models/markuplm/tokenization_markuplm.py | 6 +- .../markuplm/tokenization_markuplm_fast.py | 4 +- .../mask2former/configuration_mask2former.py | 2 +- .../image_processing_mask2former.py | 18 +++--- .../mask2former/modeling_mask2former.py | 36 +++++------ .../maskformer/image_processing_maskformer.py | 18 +++--- .../models/maskformer/modeling_maskformer.py | 8 +-- .../maskformer/modeling_maskformer_swin.py | 6 +- .../models/mbart/modeling_mbart.py | 14 ++-- .../megatron_bert/modeling_megatron_bert.py | 4 +- src/transformers/models/mimi/modeling_mimi.py | 12 ++-- .../models/mistral/modeling_mistral.py | 4 +- .../models/mistral/modeling_tf_mistral.py | 8 +-- .../models/mistral3/modeling_mistral3.py | 8 +-- .../models/mistral3/modular_mistral3.py | 6 +- .../models/mixtral/modeling_mixtral.py | 4 +- .../models/mixtral/modular_mixtral.py | 4 +- .../models/mllama/modeling_mllama.py | 12 ++-- .../models/mobilebert/modeling_mobilebert.py | 4 +- .../mobilebert/modeling_tf_mobilebert.py | 4 +- .../image_processing_mobilenet_v1.py | 2 +- .../image_processing_mobilenet_v2.py | 2 +- .../mobilevit/image_processing_mobilevit.py | 24 +++---- .../models/modernbert/modeling_modernbert.py | 2 +- .../models/modernbert/modular_modernbert.py | 2 +- .../models/moonshine/modeling_moonshine.py | 2 +- .../models/moonshine/modular_moonshine.py | 2 +- .../models/moshi/modeling_moshi.py | 26 ++++---- src/transformers/models/mt5/modeling_mt5.py | 2 +- .../models/musicgen/modeling_musicgen.py | 10 +-- .../modeling_musicgen_melody.py | 8 +-- src/transformers/models/mvp/modeling_mvp.py | 14 ++-- .../models/nemotron/modeling_nemotron.py | 4 +- .../models/nllb_moe/modeling_nllb_moe.py | 5 +- .../models/nougat/image_processing_nougat.py | 14 ++-- .../models/nougat/processing_nougat.py | 14 ++-- src/transformers/models/olmo/modeling_olmo.py | 4 +- .../models/olmo2/modeling_olmo2.py | 4 +- .../models/olmoe/modeling_olmoe.py | 4 +- .../omdet_turbo/modeling_omdet_turbo.py | 26 ++++---- .../oneformer/image_processing_oneformer.py | 18 +++--- .../models/oneformer/modeling_oneformer.py | 36 +++++------ .../models/openai/modeling_openai.py | 4 +- .../models/openai/modeling_tf_openai.py | 4 +- src/transformers/models/opt/modeling_opt.py | 6 +- .../models/owlv2/image_processing_owlv2.py | 10 +-- .../models/owlv2/modeling_owlv2.py | 32 +++++----- .../models/owlvit/modeling_owlvit.py | 30 ++++----- .../models/paligemma/modeling_paligemma.py | 10 +-- .../patchtsmixer/modeling_patchtsmixer.py | 36 +++++------ .../models/patchtst/modeling_patchtst.py | 30 ++++----- .../models/pegasus/modeling_pegasus.py | 2 +- .../models/perceiver/modeling_perceiver.py | 12 ++-- .../models/persimmon/modeling_persimmon.py | 4 +- src/transformers/models/phi/modeling_phi.py | 4 +- src/transformers/models/phi/modular_phi.py | 2 +- src/transformers/models/phi3/modeling_phi3.py | 4 +- .../modeling_phi4_multimodal.py | 4 +- .../modular_phi4_multimodal.py | 4 +- .../models/phimoe/modeling_phimoe.py | 4 +- .../pix2struct/image_processing_pix2struct.py | 2 +- .../pixtral/image_processing_pixtral.py | 10 +-- .../models/plbart/modeling_plbart.py | 8 +-- .../poolformer/image_processing_poolformer.py | 10 +-- .../models/pop2piano/modeling_pop2piano.py | 2 +- .../pop2piano/tokenization_pop2piano.py | 4 +- .../models/prophetnet/modeling_prophetnet.py | 4 +- .../models/qwen2/modeling_qwen2.py | 4 +- .../models/qwen2_5_vl/modeling_qwen2_5_vl.py | 6 +- .../models/qwen2_5_vl/modular_qwen2_5_vl.py | 2 +- .../qwen2_audio/modeling_qwen2_audio.py | 6 +- .../models/qwen2_moe/modeling_qwen2_moe.py | 4 +- .../qwen2_vl/image_processing_qwen2_vl.py | 20 +++--- .../image_processing_qwen2_vl_fast.py | 10 +-- .../models/qwen2_vl/modeling_qwen2_vl.py | 6 +- .../models/qwen3/modeling_qwen3.py | 4 +- .../models/qwen3_moe/modeling_qwen3_moe.py | 4 +- .../models/qwen3_moe/modular_qwen3_moe.py | 2 +- src/transformers/models/rag/modeling_rag.py | 8 +-- .../models/rag/modeling_tf_rag.py | 4 +- .../modeling_recurrent_gemma.py | 6 +- .../models/reformer/modeling_reformer.py | 2 +- .../models/rembert/modeling_rembert.py | 14 ++-- .../models/rembert/modeling_tf_rembert.py | 8 +-- .../models/resnet/modeling_tf_resnet.py | 8 +-- .../models/roformer/modeling_tf_roformer.py | 6 +- .../rt_detr/image_processing_rt_detr.py | 4 +- .../rt_detr/image_processing_rt_detr_fast.py | 2 +- .../models/rt_detr/modeling_rt_detr.py | 32 +++++----- .../models/rt_detr/modular_rt_detr.py | 2 +- .../models/rt_detr_v2/modeling_rt_detr_v2.py | 32 +++++----- src/transformers/models/rwkv/modeling_rwkv.py | 4 +- .../models/sam/image_processing_sam.py | 2 +- src/transformers/models/sam/modeling_sam.py | 18 ++++-- .../models/sam/modeling_tf_sam.py | 6 +- .../seamless_m4t/modeling_seamless_m4t.py | 19 +++--- .../modeling_seamless_m4t_v2.py | 39 +++++------ .../segformer/image_processing_segformer.py | 12 ++-- .../models/segformer/modeling_segformer.py | 2 +- .../shieldgemma2/modeling_shieldgemma2.py | 6 +- .../models/siglip/image_processing_siglip.py | 12 ++-- .../models/siglip/modeling_siglip.py | 12 ++-- .../models/siglip2/modeling_siglip2.py | 12 ++-- .../models/smolvlm/modeling_smolvlm.py | 8 +-- .../models/smolvlm/modular_smolvlm.py | 2 +- .../models/splinter/modeling_splinter.py | 4 +- .../models/stablelm/modeling_stablelm.py | 4 +- .../models/starcoder2/modeling_starcoder2.py | 4 +- .../models/starcoder2/modular_starcoder2.py | 2 +- .../superglue/image_processing_superglue.py | 8 +-- .../superpoint/image_processing_superpoint.py | 8 +-- src/transformers/models/swin/modeling_swin.py | 8 +-- .../models/swin/modeling_tf_swin.py | 12 ++-- .../models/swin2sr/modeling_swin2sr.py | 2 +- .../models/swinv2/modeling_swinv2.py | 8 +-- src/transformers/models/t5/modeling_t5.py | 2 +- .../modeling_table_transformer.py | 6 +- .../models/tapas/modeling_tapas.py | 4 +- .../models/tapas/modeling_tf_tapas.py | 10 +-- .../textnet/image_processing_textnet.py | 12 ++-- .../modeling_time_series_transformer.py | 2 +- .../models/tvp/image_processing_tvp.py | 26 ++++---- src/transformers/models/tvp/modeling_tvp.py | 2 +- src/transformers/models/udop/modeling_udop.py | 16 ++--- .../models/udop/tokenization_udop.py | 6 +- .../models/udop/tokenization_udop_fast.py | 4 +- src/transformers/models/umt5/modeling_umt5.py | 2 +- .../models/unispeech/modeling_unispeech.py | 6 +- .../unispeech_sat/modeling_unispeech_sat.py | 8 +-- .../models/univnet/modeling_univnet.py | 4 +- .../image_processing_video_llava.py | 16 ++--- .../video_llava/modeling_video_llava.py | 8 +-- .../videomae/image_processing_videomae.py | 20 +++--- .../models/videomae/modeling_videomae.py | 4 +- src/transformers/models/vilt/modeling_vilt.py | 2 +- .../models/vipllava/modeling_vipllava.py | 6 +- .../visual_bert/modeling_visual_bert.py | 4 +- .../models/vit_mae/modeling_tf_vit_mae.py | 22 +++---- .../models/vit_mae/modeling_vit_mae.py | 14 ++-- .../models/vitmatte/modeling_vitmatte.py | 2 +- .../vitpose/image_processing_vitpose.py | 10 +-- .../models/vitpose/modeling_vitpose.py | 2 +- src/transformers/models/vits/modeling_vits.py | 10 +-- .../models/vivit/image_processing_vivit.py | 24 +++---- .../models/wav2vec2/modeling_tf_wav2vec2.py | 4 +- .../models/wav2vec2/modeling_wav2vec2.py | 6 +- .../models/wav2vec2/tokenization_wav2vec2.py | 8 +-- .../modeling_wav2vec2_conformer.py | 6 +- .../tokenization_wav2vec2_phoneme.py | 6 +- .../models/whisper/modeling_whisper.py | 2 +- .../models/whisper/tokenization_whisper.py | 4 +- .../whisper/tokenization_whisper_fast.py | 4 +- .../models/x_clip/modeling_x_clip.py | 8 +-- src/transformers/models/xglm/modeling_xglm.py | 2 +- .../models/xlm/modeling_tf_xlm.py | 2 +- .../models/xlnet/modeling_tf_xlnet.py | 14 ++-- .../models/xlnet/modeling_xlnet.py | 12 ++-- .../models/xlnet/tokenization_xlnet.py | 2 +- .../models/yolos/image_processing_yolos.py | 4 +- .../models/yolos/modeling_yolos.py | 4 +- .../models/zamba/modeling_zamba.py | 4 +- .../models/zamba2/modeling_zamba2.py | 4 +- .../models/zamba2/modular_zamba2.py | 2 +- .../zoedepth/image_processing_zoedepth.py | 12 ++-- .../models/zoedepth/modeling_zoedepth.py | 4 +- src/transformers/optimization.py | 4 +- src/transformers/tf_utils.py | 2 +- src/transformers/tokenization_utils.py | 2 +- src/transformers/tokenization_utils_base.py | 4 +- src/transformers/tokenization_utils_fast.py | 2 +- src/transformers/utils/import_utils.py | 2 +- 365 files changed, 1647 insertions(+), 1616 deletions(-) diff --git a/src/transformers/generation/flax_utils.py b/src/transformers/generation/flax_utils.py index b6a02cd59b..ddd718cbb8 100644 --- a/src/transformers/generation/flax_utils.py +++ b/src/transformers/generation/flax_utils.py @@ -63,7 +63,7 @@ class FlaxGreedySearchOutput(ModelOutput): The generated sequences. """ - sequences: jnp.ndarray = None + sequences: Optional[jnp.ndarray] = None @flax.struct.dataclass @@ -77,7 +77,7 @@ class FlaxSampleOutput(ModelOutput): The generated sequences. """ - sequences: jnp.ndarray = None + sequences: Optional[jnp.ndarray] = None @flax.struct.dataclass @@ -93,8 +93,8 @@ class FlaxBeamSearchOutput(ModelOutput): The scores (log probabilities) of the generated sequences. """ - sequences: jnp.ndarray = None - scores: jnp.ndarray = None + sequences: Optional[jnp.ndarray] = None + scores: Optional[jnp.ndarray] = None @flax.struct.dataclass diff --git a/src/transformers/generation/tf_utils.py b/src/transformers/generation/tf_utils.py index 262a85330a..344147e6e3 100644 --- a/src/transformers/generation/tf_utils.py +++ b/src/transformers/generation/tf_utils.py @@ -76,7 +76,7 @@ class TFGreedySearchDecoderOnlyOutput(ModelOutput): `tf.Tensor` of shape `(batch_size, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None attentions: Optional[Tuple[Tuple[tf.Tensor]]] = None hidden_states: Optional[Tuple[Tuple[tf.Tensor]]] = None @@ -115,7 +115,7 @@ class TFGreedySearchEncoderDecoderOutput(ModelOutput): `tf.Tensor` of shape `(batch_size, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None encoder_attentions: Optional[Tuple[tf.Tensor]] = None encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None @@ -146,7 +146,7 @@ class TFSampleDecoderOnlyOutput(ModelOutput): `tf.Tensor` of shape `(num_return_sequences*batch_size, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None attentions: Optional[Tuple[Tuple[tf.Tensor]]] = None hidden_states: Optional[Tuple[Tuple[tf.Tensor]]] = None @@ -185,7 +185,7 @@ class TFSampleEncoderDecoderOutput(ModelOutput): `tf.Tensor` of shape `(batch_size*num_return_sequences, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None encoder_attentions: Optional[Tuple[tf.Tensor]] = None encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None @@ -221,7 +221,7 @@ class TFBeamSearchDecoderOnlyOutput(ModelOutput): `tf.Tensor` of shape `(batch_size*num_beams*num_return_sequences, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None sequences_scores: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None beam_indices: Optional[tf.Tensor] = None @@ -268,7 +268,7 @@ class TFBeamSearchEncoderDecoderOutput(ModelOutput): `tf.Tensor` of shape `(batch_size*num_beams*num_return_sequences, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None sequences_scores: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None beam_indices: Optional[tf.Tensor] = None @@ -306,7 +306,7 @@ class TFBeamSampleDecoderOnlyOutput(ModelOutput): `tf.Tensor` of shape `(batch_size*num_beams, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None sequences_scores: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None beam_indices: Optional[tf.Tensor] = None @@ -352,7 +352,7 @@ class TFBeamSampleEncoderDecoderOutput(ModelOutput): `tf.Tensor` of shape `(batch_size*num_beams, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None sequences_scores: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None beam_indices: Optional[tf.Tensor] = None @@ -384,7 +384,7 @@ class TFContrastiveSearchDecoderOnlyOutput(ModelOutput): `tf.Tensor` of shape `(batch_size, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None attentions: Optional[Tuple[Tuple[tf.Tensor]]] = None hidden_states: Optional[Tuple[Tuple[tf.Tensor]]] = None @@ -422,7 +422,7 @@ class TFContrastiveSearchEncoderDecoderOutput(ModelOutput): `tf.Tensor` of shape `(batch_size, generated_length, hidden_size)`. """ - sequences: tf.Tensor = None + sequences: Optional[tf.Tensor] = None scores: Optional[Tuple[tf.Tensor]] = None encoder_attentions: Optional[Tuple[tf.Tensor]] = None encoder_hidden_states: Optional[Tuple[tf.Tensor]] = None diff --git a/src/transformers/generation/watermarking.py b/src/transformers/generation/watermarking.py index 669828d2c9..e5f900c3b7 100644 --- a/src/transformers/generation/watermarking.py +++ b/src/transformers/generation/watermarking.py @@ -61,11 +61,11 @@ class WatermarkDetectorOutput: Array containing confidence scores of a text being machine-generated for each element in the batch. """ - num_tokens_scored: np.array = None - num_green_tokens: np.array = None - green_fraction: np.array = None - z_score: np.array = None - p_value: np.array = None + num_tokens_scored: Optional[np.array] = None + num_green_tokens: Optional[np.array] = None + green_fraction: Optional[np.array] = None + z_score: Optional[np.array] = None + p_value: Optional[np.array] = None prediction: Optional[np.array] = None confidence: Optional[np.array] = None diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py index fa0f9952e0..b671a11191 100644 --- a/src/transformers/image_processing_utils_fast.py +++ b/src/transformers/image_processing_utils_fast.py @@ -573,7 +573,7 @@ class BaseImageProcessorFast(BaseImageProcessor): def _prepare_input_images( self, images: ImageInput, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, device: Optional["torch.device"] = None, ) -> list["torch.Tensor"]: diff --git a/src/transformers/integrations/executorch.py b/src/transformers/integrations/executorch.py index aafe6a13a3..591c556e59 100644 --- a/src/transformers/integrations/executorch.py +++ b/src/transformers/integrations/executorch.py @@ -10,6 +10,8 @@ # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the # specific language governing permissions and limitations under the License. +from typing import Optional + import torch from transformers.generation.configuration_utils import GenerationConfig @@ -178,8 +180,8 @@ class TorchExportableModuleWithStaticCache(torch.nn.Module): def convert_and_export_with_cache( model: PreTrainedModel, - example_input_ids: torch.Tensor = None, - example_cache_position: torch.Tensor = None, + example_input_ids: Optional[torch.Tensor] = None, + example_cache_position: Optional[torch.Tensor] = None, ): """ Convert a `PreTrainedModel` into an exportable module and export it using `torch.export`, diff --git a/src/transformers/modeling_flash_attention_utils.py b/src/transformers/modeling_flash_attention_utils.py index a505a954b3..c7d54dc415 100644 --- a/src/transformers/modeling_flash_attention_utils.py +++ b/src/transformers/modeling_flash_attention_utils.py @@ -289,7 +289,7 @@ def _flash_attention_forward( sliding_window: Optional[int] = None, use_top_left_mask: bool = False, softcap: Optional[float] = None, - deterministic: bool = None, + deterministic: Optional[bool] = None, cu_seq_lens_q: Optional[torch.LongTensor] = None, cu_seq_lens_k: Optional[torch.LongTensor] = None, max_length_q: Optional[int] = None, diff --git a/src/transformers/modeling_flax_outputs.py b/src/transformers/modeling_flax_outputs.py index 179a0b7879..325f968571 100644 --- a/src/transformers/modeling_flax_outputs.py +++ b/src/transformers/modeling_flax_outputs.py @@ -40,7 +40,7 @@ class FlaxBaseModelOutput(ModelOutput): heads. """ - last_hidden_state: jnp.ndarray = None + last_hidden_state: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -59,7 +59,7 @@ class FlaxBaseModelOutputWithNoAttention(ModelOutput): model at the output of each layer plus the optional initial embedding outputs. """ - last_hidden_state: jnp.ndarray = None + last_hidden_state: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None @@ -79,8 +79,8 @@ class FlaxBaseModelOutputWithPoolingAndNoAttention(ModelOutput): model at the output of each layer plus the optional initial embedding outputs. """ - last_hidden_state: jnp.ndarray = None - pooler_output: jnp.ndarray = None + last_hidden_state: Optional[jnp.ndarray] = None + pooler_output: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None @@ -99,7 +99,7 @@ class FlaxImageClassifierOutputWithNoAttention(ModelOutput): called feature maps) of the model at the output of each stage. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None @@ -127,7 +127,7 @@ class FlaxBaseModelOutputWithPast(ModelOutput): heads. """ - last_hidden_state: jnp.ndarray = None + last_hidden_state: Optional[jnp.ndarray] = None past_key_values: Optional[Dict[str, jnp.ndarray]] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -158,8 +158,8 @@ class FlaxBaseModelOutputWithPooling(ModelOutput): heads. """ - last_hidden_state: jnp.ndarray = None - pooler_output: jnp.ndarray = None + last_hidden_state: Optional[jnp.ndarray] = None + pooler_output: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -205,8 +205,8 @@ class FlaxBaseModelOutputWithPoolingAndCrossAttentions(ModelOutput): input) to speed up sequential decoding. """ - last_hidden_state: jnp.ndarray = None - pooler_output: jnp.ndarray = None + last_hidden_state: Optional[jnp.ndarray] = None + pooler_output: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None past_key_values: Optional[Tuple[Tuple[jnp.ndarray]]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -252,7 +252,7 @@ class FlaxBaseModelOutputWithPastAndCrossAttentions(ModelOutput): weighted average in the cross-attention heads. """ - last_hidden_state: jnp.ndarray = None + last_hidden_state: Optional[jnp.ndarray] = None past_key_values: Optional[Tuple[Tuple[jnp.ndarray]]] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -310,7 +310,7 @@ class FlaxSeq2SeqModelOutput(ModelOutput): self-attention heads. """ - last_hidden_state: jnp.ndarray = None + last_hidden_state: Optional[jnp.ndarray] = None past_key_values: Optional[Tuple[Tuple[jnp.ndarray]]] = None decoder_hidden_states: Optional[Tuple[jnp.ndarray]] = None decoder_attentions: Optional[Tuple[jnp.ndarray]] = None @@ -354,7 +354,7 @@ class FlaxCausalLMOutputWithCrossAttentions(ModelOutput): `past_key_values` input) to speed up sequential decoding. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None past_key_values: Optional[Tuple[Tuple[jnp.ndarray]]] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -382,7 +382,7 @@ class FlaxMaskedLMOutput(ModelOutput): heads. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -437,7 +437,7 @@ class FlaxSeq2SeqLMOutput(ModelOutput): self-attention heads. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None past_key_values: Optional[Tuple[Tuple[jnp.ndarray]]] = None decoder_hidden_states: Optional[Tuple[jnp.ndarray]] = None decoder_attentions: Optional[Tuple[jnp.ndarray]] = None @@ -469,7 +469,7 @@ class FlaxNextSentencePredictorOutput(ModelOutput): heads. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -495,7 +495,7 @@ class FlaxSequenceClassifierOutput(ModelOutput): heads. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -547,7 +547,7 @@ class FlaxSeq2SeqSequenceClassifierOutput(ModelOutput): self-attention heads. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None past_key_values: Optional[Tuple[Tuple[jnp.ndarray]]] = None decoder_hidden_states: Optional[Tuple[jnp.ndarray]] = None decoder_attentions: Optional[Tuple[jnp.ndarray]] = None @@ -580,7 +580,7 @@ class FlaxMultipleChoiceModelOutput(ModelOutput): heads. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -606,7 +606,7 @@ class FlaxTokenClassifierOutput(ModelOutput): heads. """ - logits: jnp.ndarray = None + logits: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -634,8 +634,8 @@ class FlaxQuestionAnsweringModelOutput(ModelOutput): heads. """ - start_logits: jnp.ndarray = None - end_logits: jnp.ndarray = None + start_logits: Optional[jnp.ndarray] = None + end_logits: Optional[jnp.ndarray] = None hidden_states: Optional[Tuple[jnp.ndarray]] = None attentions: Optional[Tuple[jnp.ndarray]] = None @@ -689,8 +689,8 @@ class FlaxSeq2SeqQuestionAnsweringModelOutput(ModelOutput): self-attention heads. """ - start_logits: jnp.ndarray = None - end_logits: jnp.ndarray = None + start_logits: Optional[jnp.ndarray] = None + end_logits: Optional[jnp.ndarray] = None past_key_values: Optional[Tuple[Tuple[jnp.ndarray]]] = None decoder_hidden_states: Optional[Tuple[jnp.ndarray]] = None decoder_attentions: Optional[Tuple[jnp.ndarray]] = None diff --git a/src/transformers/modeling_tf_outputs.py b/src/transformers/modeling_tf_outputs.py index 357c34bc1f..cbc8b3682a 100644 --- a/src/transformers/modeling_tf_outputs.py +++ b/src/transformers/modeling_tf_outputs.py @@ -44,7 +44,7 @@ class TFBaseModelOutput(ModelOutput): heads. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -64,7 +64,7 @@ class TFBaseModelOutputWithNoAttention(ModelOutput): Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Optional[Tuple[tf.Tensor, ...]] = None @@ -96,8 +96,8 @@ class TFBaseModelOutputWithPooling(ModelOutput): heads. """ - last_hidden_state: tf.Tensor = None - pooler_output: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None + pooler_output: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -119,8 +119,8 @@ class TFBaseModelOutputWithPoolingAndNoAttention(ModelOutput): Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. """ - last_hidden_state: tf.Tensor = None - pooler_output: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None + pooler_output: Optional[tf.Tensor] = None hidden_states: Optional[Tuple[tf.Tensor, ...]] = None @@ -164,8 +164,8 @@ class TFBaseModelOutputWithPoolingAndCrossAttentions(ModelOutput): weighted average in the cross-attention heads. """ - last_hidden_state: tf.Tensor = None - pooler_output: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None + pooler_output: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -202,7 +202,7 @@ class TFBaseModelOutputWithPast(ModelOutput): heads. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -235,7 +235,7 @@ class TFBaseModelOutputWithCrossAttentions(ModelOutput): weighted average in the cross-attention heads. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None cross_attentions: Tuple[tf.Tensor] | None = None @@ -277,7 +277,7 @@ class TFBaseModelOutputWithPastAndCrossAttentions(ModelOutput): weighted average in the cross-attention heads. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -334,7 +334,7 @@ class TFSeq2SeqModelOutput(ModelOutput): self-attention heads. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None decoder_hidden_states: Tuple[tf.Tensor] | None = None decoder_attentions: Tuple[tf.Tensor] | None = None @@ -368,7 +368,7 @@ class TFCausalLMOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -403,7 +403,7 @@ class TFCausalLMOutputWithPast(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -445,7 +445,7 @@ class TFCausalLMOutputWithCrossAttentions(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -476,7 +476,7 @@ class TFMaskedLMOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -530,7 +530,7 @@ class TFSeq2SeqLMOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None decoder_hidden_states: Tuple[tf.Tensor] | None = None decoder_attentions: Tuple[tf.Tensor] | None = None @@ -565,7 +565,7 @@ class TFNextSentencePredictorOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -594,7 +594,7 @@ class TFSequenceClassifierOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -645,7 +645,7 @@ class TFSeq2SeqSequenceClassifierOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None decoder_hidden_states: Tuple[tf.Tensor] | None = None decoder_attentions: Tuple[tf.Tensor] | None = None @@ -687,7 +687,7 @@ class TFSemanticSegmenterOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -719,7 +719,7 @@ class TFSemanticSegmenterOutputWithNoAttention(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None @@ -745,7 +745,7 @@ class TFImageClassifierOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -776,7 +776,7 @@ class TFMultipleChoiceModelOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -805,7 +805,7 @@ class TFTokenClassifierOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -836,8 +836,8 @@ class TFQuestionAnsweringModelOutput(ModelOutput): """ loss: tf.Tensor | None = None - start_logits: tf.Tensor = None - end_logits: tf.Tensor = None + start_logits: Optional[tf.Tensor] = None + end_logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -887,8 +887,8 @@ class TFSeq2SeqQuestionAnsweringModelOutput(ModelOutput): """ loss: tf.Tensor | None = None - start_logits: tf.Tensor = None - end_logits: tf.Tensor = None + start_logits: Optional[tf.Tensor] = None + end_logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None decoder_hidden_states: Tuple[tf.Tensor] | None = None decoder_attentions: Tuple[tf.Tensor] | None = None @@ -927,7 +927,7 @@ class TFSequenceClassifierOutputWithPast(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -950,7 +950,7 @@ class TFImageClassifierOutputWithNoAttention(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Optional[Tuple[tf.Tensor, ...]] = None @@ -977,7 +977,7 @@ class TFMaskedImageModelingOutput(ModelOutput): """ loss: tf.Tensor | None = None - reconstruction: tf.Tensor = None + reconstruction: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index a09bc430a4..c0add5bb66 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -2530,7 +2530,7 @@ class TFPreTrainedModel(keras.Model, TFModelUtilsMixin, TFGenerationMixin, PushT local_files_only: bool = False, token: Optional[Union[str, bool]] = None, revision: str = "main", - use_safetensors: bool = None, + use_safetensors: Optional[bool] = None, **kwargs, ): r""" diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index dca1fe7f60..574735c189 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -609,8 +609,8 @@ class AlbertForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - sop_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + sop_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 24a25658a4..6800cfa8d1 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -164,10 +164,10 @@ class TFAlbertEmbeddings(keras.layers.Layer): # Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, past_key_values_length=0, training: bool = False, ) -> tf.Tensor: @@ -749,9 +749,9 @@ class TFAlbertForPreTrainingOutput(ModelOutput): heads. """ - loss: tf.Tensor = None - prediction_logits: tf.Tensor = None - sop_logits: tf.Tensor = None + loss: Optional[tf.Tensor] = None + prediction_logits: Optional[tf.Tensor] = None + sop_logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/align/modeling_align.py b/src/transformers/models/align/modeling_align.py index f834aecf69..a007b7a7c6 100644 --- a/src/transformers/models/align/modeling_align.py +++ b/src/transformers/models/align/modeling_align.py @@ -198,7 +198,7 @@ class AlignVisionModelOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -226,7 +226,7 @@ class AlignTextModelOutput(ModelOutput): """ text_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -254,10 +254,10 @@ class AlignOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPoolingAndCrossAttentions = None vision_model_output: BaseModelOutputWithPoolingAndNoAttention = None diff --git a/src/transformers/models/altclip/modeling_altclip.py b/src/transformers/models/altclip/modeling_altclip.py index 90d8aa631e..6e4c9e650d 100755 --- a/src/transformers/models/altclip/modeling_altclip.py +++ b/src/transformers/models/altclip/modeling_altclip.py @@ -182,10 +182,10 @@ class AltCLIPOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/aria/modeling_aria.py b/src/transformers/models/aria/modeling_aria.py index 35c1730f1e..92f0685c3b 100644 --- a/src/transformers/models/aria/modeling_aria.py +++ b/src/transformers/models/aria/modeling_aria.py @@ -902,7 +902,7 @@ class AriaTextModel(AriaTextPreTrainedModel): @add_start_docstrings_to_model_forward(ARIA_TEXT_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1189,7 +1189,7 @@ class AriaTextForCausalLM(AriaTextPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1303,7 +1303,7 @@ class AriaCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1424,7 +1424,7 @@ class AriaForConditionalGeneration(AriaPreTrainedModel, GenerationMixin): def get_image_features( self, pixel_values: torch.FloatTensor, - pixel_mask: torch.FloatTensor = None, + pixel_mask: Optional[torch.FloatTensor] = None, vision_feature_layer: int = -1, ): patch_attention_mask = self._create_patch_attention_mask(pixel_mask) @@ -1446,9 +1446,9 @@ class AriaForConditionalGeneration(AriaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=AriaCausalLMOutputWithPast, config_class=AriaConfig) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_mask: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_mask: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/aria/modular_aria.py b/src/transformers/models/aria/modular_aria.py index afc61a02dd..3f38c87b5d 100644 --- a/src/transformers/models/aria/modular_aria.py +++ b/src/transformers/models/aria/modular_aria.py @@ -1402,7 +1402,7 @@ class AriaForConditionalGeneration(AriaPreTrainedModel, GenerationMixin): def get_image_features( self, pixel_values: torch.FloatTensor, - pixel_mask: torch.FloatTensor = None, + pixel_mask: Optional[torch.FloatTensor] = None, vision_feature_layer: int = -1, ): patch_attention_mask = self._create_patch_attention_mask(pixel_mask) @@ -1424,9 +1424,9 @@ class AriaForConditionalGeneration(AriaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=AriaCausalLMOutputWithPast, config_class=AriaConfig) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_mask: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_mask: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/autoformer/modeling_autoformer.py b/src/transformers/models/autoformer/modeling_autoformer.py index aaca155b0c..5a96b6235d 100644 --- a/src/transformers/models/autoformer/modeling_autoformer.py +++ b/src/transformers/models/autoformer/modeling_autoformer.py @@ -85,8 +85,8 @@ class AutoFormerDecoderOutput(ModelOutput): weighted average in the cross-attention heads. """ - last_hidden_state: torch.FloatTensor = None - trend: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + trend: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -153,8 +153,8 @@ class AutoformerModelOutput(ModelOutput): Static features of each time series' in a batch which are copied to the covariates at inference time. """ - last_hidden_state: torch.FloatTensor = None - trend: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + trend: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -305,7 +305,7 @@ class AutoformerNOPScaler(nn.Module): self.keepdim = config.keepdim if hasattr(config, "keepdim") else True def forward( - self, data: torch.Tensor, observed_indicator: torch.Tensor = None + self, data: torch.Tensor, observed_indicator: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Parameters: diff --git a/src/transformers/models/aya_vision/modeling_aya_vision.py b/src/transformers/models/aya_vision/modeling_aya_vision.py index 021898931a..1e6e76a210 100644 --- a/src/transformers/models/aya_vision/modeling_aya_vision.py +++ b/src/transformers/models/aya_vision/modeling_aya_vision.py @@ -182,7 +182,7 @@ class AyaVisionCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -355,8 +355,8 @@ class AyaVisionForConditionalGeneration(AyaVisionPreTrainedModel, GenerationMixi @replace_return_docstrings(output_type=AyaVisionCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -370,7 +370,7 @@ class AyaVisionForConditionalGeneration(AyaVisionPreTrainedModel, GenerationMixi return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, logits_to_keep: Union[int, torch.Tensor] = 0, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **lm_kwargs, ) -> Union[Tuple, AyaVisionCausalLMOutputWithPast]: r""" diff --git a/src/transformers/models/aya_vision/modular_aya_vision.py b/src/transformers/models/aya_vision/modular_aya_vision.py index b39c6e0803..b046275a2d 100644 --- a/src/transformers/models/aya_vision/modular_aya_vision.py +++ b/src/transformers/models/aya_vision/modular_aya_vision.py @@ -212,8 +212,8 @@ class AyaVisionForConditionalGeneration(LlavaForConditionalGeneration): def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -227,7 +227,7 @@ class AyaVisionForConditionalGeneration(LlavaForConditionalGeneration): return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, logits_to_keep: Union[int, torch.Tensor] = 0, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **lm_kwargs, ) -> Union[Tuple, AyaVisionCausalLMOutputWithPast]: r""" diff --git a/src/transformers/models/bamba/modeling_bamba.py b/src/transformers/models/bamba/modeling_bamba.py index 5af06951c6..fd16fc6334 100644 --- a/src/transformers/models/bamba/modeling_bamba.py +++ b/src/transformers/models/bamba/modeling_bamba.py @@ -1198,7 +1198,7 @@ class BambaModel(BambaPreTrainedModel): @add_start_docstrings_to_model_forward(BAMBA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridMambaAttentionDynamicCache] = None, @@ -1476,7 +1476,7 @@ class BambaForCausalLM(BambaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridMambaAttentionDynamicCache] = None, diff --git a/src/transformers/models/bamba/modular_bamba.py b/src/transformers/models/bamba/modular_bamba.py index df4f020057..b6cdf90774 100644 --- a/src/transformers/models/bamba/modular_bamba.py +++ b/src/transformers/models/bamba/modular_bamba.py @@ -940,7 +940,7 @@ class BambaModel(BambaPreTrainedModel): @add_start_docstrings_to_model_forward(BAMBA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridMambaAttentionDynamicCache] = None, @@ -1187,7 +1187,7 @@ class BambaForCausalLM(LlamaForCausalLM): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridMambaAttentionDynamicCache] = None, diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index a519528491..bc3d4dcd93 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -993,7 +993,7 @@ class BartEncoder(BartPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -1178,7 +1178,7 @@ class BartDecoder(BartPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1474,7 +1474,7 @@ class BartModel(BartPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1615,7 +1615,7 @@ class BartForConditionalGeneration(BartPreTrainedModel, GenerationMixin): @add_end_docstrings(BART_GENERATION_EXAMPLE) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1742,7 +1742,7 @@ class BartForSequenceClassification(BartPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1871,7 +1871,7 @@ class BartForQuestionAnswering(BartPreTrainedModel): ) def forward( self, - input_ids: torch.Tensor = None, + input_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2020,7 +2020,7 @@ class BartForCausalLM(BartPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/beit/image_processing_beit.py b/src/transformers/models/beit/image_processing_beit.py index ce75797f5c..0d928f2141 100644 --- a/src/transformers/models/beit/image_processing_beit.py +++ b/src/transformers/models/beit/image_processing_beit.py @@ -190,15 +190,15 @@ class BeitImageProcessor(BaseImageProcessor): def _preprocess( self, image: ImageInput, - do_reduce_labels: bool = None, - do_resize: bool = None, + do_reduce_labels: Optional[bool] = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -223,14 +223,14 @@ class BeitImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[Union[str, ChannelDimension]] = None, @@ -268,12 +268,12 @@ class BeitImageProcessor(BaseImageProcessor): def _preprocess_segmentation_map( self, segmentation_map: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_reduce_labels: bool = None, + do_reduce_labels: Optional[bool] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ): """Preprocesses a single segmentation map.""" @@ -317,14 +317,14 @@ class BeitImageProcessor(BaseImageProcessor): self, images: ImageInput, segmentation_maps: Optional[ImageInput] = None, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_reduce_labels: Optional[bool] = None, diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index d7a26500cc..38d2c8c8b5 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -880,8 +880,8 @@ class BertForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - seq_relationship_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + seq_relationship_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index ce862194dc..ba73faf23c 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -161,10 +161,10 @@ class TFBertEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, past_key_values_length=0, training: bool = False, ) -> tf.Tensor: @@ -1048,8 +1048,8 @@ class TFBertForPreTrainingOutput(ModelOutput): """ loss: tf.Tensor | None = None - prediction_logits: tf.Tensor = None - seq_relationship_logits: tf.Tensor = None + prediction_logits: Optional[tf.Tensor] = None + seq_relationship_logits: Optional[tf.Tensor] = None hidden_states: Optional[Union[Tuple[tf.Tensor], tf.Tensor]] = None attentions: Optional[Union[Tuple[tf.Tensor], tf.Tensor]] = None diff --git a/src/transformers/models/bert/tokenization_bert_tf.py b/src/transformers/models/bert/tokenization_bert_tf.py index b1f49722fb..86658de524 100644 --- a/src/transformers/models/bert/tokenization_bert_tf.py +++ b/src/transformers/models/bert/tokenization_bert_tf.py @@ -1,5 +1,5 @@ import os -from typing import List, Union +from typing import List, Optional, Union import tensorflow as tf from tensorflow_text import BertTokenizer as BertTokenizerLayer @@ -58,13 +58,13 @@ class TFBertTokenizer(keras.layers.Layer): self, vocab_list: List, do_lower_case: bool, - cls_token_id: int = None, - sep_token_id: int = None, - pad_token_id: int = None, + cls_token_id: Optional[int] = None, + sep_token_id: Optional[int] = None, + pad_token_id: Optional[int] = None, padding: str = "longest", truncation: bool = True, max_length: int = 512, - pad_to_multiple_of: int = None, + pad_to_multiple_of: Optional[int] = None, return_token_type_ids: bool = True, return_attention_mask: bool = True, use_fast_bert_tokenizer: bool = True, diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index 4ddce6e9fe..c2f6646202 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -1860,8 +1860,8 @@ class BigBirdForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - seq_relationship_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + seq_relationship_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1894,9 +1894,9 @@ class BigBirdForQuestionAnsweringModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - start_logits: torch.FloatTensor = None - end_logits: torch.FloatTensor = None - pooler_output: torch.FloatTensor = None + start_logits: Optional[torch.FloatTensor] = None + end_logits: Optional[torch.FloatTensor] = None + pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1970,7 +1970,7 @@ class BigBirdModel(BigBirdPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -2268,7 +2268,7 @@ class BigBirdForPreTraining(BigBirdPreTrainedModel): @replace_return_docstrings(output_type=BigBirdForPreTrainingOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -2381,7 +2381,7 @@ class BigBirdForMaskedLM(BigBirdPreTrainedModel): @replace_return_docstrings(output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -2527,7 +2527,7 @@ class BigBirdForCausalLM(BigBirdPreTrainedModel, GenerationMixin): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -2666,7 +2666,7 @@ class BigBirdForSequenceClassification(BigBirdPreTrainedModel): @replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -2800,7 +2800,7 @@ class BigBirdForMultipleChoice(BigBirdPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -2895,7 +2895,7 @@ class BigBirdForTokenClassification(BigBirdPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/big_bird/tokenization_big_bird.py b/src/transformers/models/big_bird/tokenization_big_bird.py index 194cbc68cb..3e2d13e47a 100644 --- a/src/transformers/models/big_bird/tokenization_big_bird.py +++ b/src/transformers/models/big_bird/tokenization_big_bird.py @@ -184,7 +184,7 @@ class BigBirdTokenizer(PreTrainedTokenizer): self, token_ids: List[int], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, spaces_between_special_tokens: bool = True, **kwargs, ) -> str: diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 935e5ee9fd..99333b7c15 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -2346,7 +2346,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel): # Copied from transformers.models.bart.modeling_bart.BartModel.forward with Bart->BigBirdPegasus def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2489,7 +2489,7 @@ class BigBirdPegasusForConditionalGeneration(BigBirdPegasusPreTrainedModel, Gene @add_end_docstrings(BIGBIRD_PEGASUS_GENERATION_EXAMPLE) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2615,7 +2615,7 @@ class BigBirdPegasusForSequenceClassification(BigBirdPegasusPreTrainedModel): # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2743,7 +2743,7 @@ class BigBirdPegasusForQuestionAnswering(BigBirdPegasusPreTrainedModel): # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward def forward( self, - input_ids: torch.Tensor = None, + input_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2887,7 +2887,7 @@ class BigBirdPegasusForCausalLM(BigBirdPegasusPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py index abc0954c6a..2b1f307a29 100644 --- a/src/transformers/models/bit/image_processing_bit.py +++ b/src/transformers/models/bit/image_processing_bit.py @@ -176,17 +176,17 @@ class BitImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/blenderbot/modeling_blenderbot.py b/src/transformers/models/blenderbot/modeling_blenderbot.py index e40208a65b..9ce51bebe7 100755 --- a/src/transformers/models/blenderbot/modeling_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_blenderbot.py @@ -1397,7 +1397,7 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py index dec50328b7..7bb4a49bb8 100755 --- a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py @@ -1351,7 +1351,7 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel, GenerationMixin @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py index df2aee157d..9f28b33a66 100644 --- a/src/transformers/models/blip/image_processing_blip.py +++ b/src/transformers/models/blip/image_processing_blip.py @@ -169,7 +169,7 @@ class BlipImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> PIL.Image.Image: diff --git a/src/transformers/models/blip/modeling_blip.py b/src/transformers/models/blip/modeling_blip.py index dd9b57973c..1f248ab8be 100644 --- a/src/transformers/models/blip/modeling_blip.py +++ b/src/transformers/models/blip/modeling_blip.py @@ -87,7 +87,7 @@ class BlipForConditionalGenerationModelOutput(ModelOutput): loss: Optional[Tuple[torch.FloatTensor]] = None logits: Optional[Tuple[torch.FloatTensor]] = None image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -129,7 +129,7 @@ class BlipTextVisionModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -170,7 +170,7 @@ class BlipImageTextMatchingModelOutput(ModelOutput): itm_score: Optional[torch.FloatTensor] = None loss: Optional[torch.FloatTensor] = None image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None vision_pooler_output: Optional[torch.FloatTensor] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -200,10 +200,10 @@ class BlipOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/blip/modeling_tf_blip.py b/src/transformers/models/blip/modeling_tf_blip.py index 92f61bf470..9573ca0fbb 100644 --- a/src/transformers/models/blip/modeling_tf_blip.py +++ b/src/transformers/models/blip/modeling_tf_blip.py @@ -96,7 +96,7 @@ class TFBlipForConditionalGenerationModelOutput(ModelOutput): loss: Tuple[tf.Tensor] | None = None logits: Tuple[tf.Tensor] | None = None image_embeds: tf.Tensor | None = None - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -138,7 +138,7 @@ class TFBlipTextVisionModelOutput(ModelOutput): loss: tf.Tensor | None = None image_embeds: tf.Tensor | None = None - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -179,7 +179,7 @@ class TFBlipImageTextMatchingModelOutput(ModelOutput): itm_score: tf.Tensor | None = None loss: tf.Tensor | None = None image_embeds: tf.Tensor | None = None - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None vision_pooler_output: tf.Tensor | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -209,10 +209,10 @@ class TFBlipOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits_per_image: tf.Tensor = None - logits_per_text: tf.Tensor = None - text_embeds: tf.Tensor = None - image_embeds: tf.Tensor = None + logits_per_image: Optional[tf.Tensor] = None + logits_per_text: Optional[tf.Tensor] = None + text_embeds: Optional[tf.Tensor] = None + image_embeds: Optional[tf.Tensor] = None text_model_output: TFBaseModelOutputWithPooling = None vision_model_output: TFBaseModelOutputWithPooling = None @@ -309,9 +309,9 @@ class TFBlipTextEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, ) -> tf.Tensor: """ Applies embedding based on inputs tensor. diff --git a/src/transformers/models/blip_2/modeling_blip_2.py b/src/transformers/models/blip_2/modeling_blip_2.py index a02868359e..5e5b6f9f4b 100644 --- a/src/transformers/models/blip_2/modeling_blip_2.py +++ b/src/transformers/models/blip_2/modeling_blip_2.py @@ -106,10 +106,10 @@ class Blip2ImageTextMatchingModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None @@ -145,7 +145,7 @@ class Blip2TextModelOutput(ModelOutput): """ text_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -175,7 +175,7 @@ class Blip2VisionModelOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index 2ab7d9dfc1..524b4caa74 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -149,9 +149,9 @@ class BridgeTowerModelOutput(ModelOutput): heads. """ - text_features: torch.FloatTensor = None - image_features: torch.FloatTensor = None - pooler_output: torch.FloatTensor = None + text_features: Optional[torch.FloatTensor] = None + image_features: Optional[torch.FloatTensor] = None + pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -182,7 +182,7 @@ class BridgeTowerContrastiveOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None text_embeds: Optional[Tuple[torch.FloatTensor]] = None image_embeds: Optional[Tuple[torch.FloatTensor]] = None cross_embeds: Optional[Tuple[torch.FloatTensor]] = None @@ -225,7 +225,7 @@ class BridgeTowerResidualAttention(nn.Module): key_padding_mask=attention_mask, )[0] - def forward(self, hidden_state: torch.Tensor, attention_mask: torch.Tensor = None): + def forward(self, hidden_state: torch.Tensor, attention_mask: Optional[torch.Tensor] = None): residual_state = hidden_state + self.attention(self.ln_1(hidden_state), attention_mask) hidden_state = self.ln_2(residual_state) for _, layer in self.mlp.items(): diff --git a/src/transformers/models/bros/modeling_bros.py b/src/transformers/models/bros/modeling_bros.py index ee278631f2..df39d2a49b 100755 --- a/src/transformers/models/bros/modeling_bros.py +++ b/src/transformers/models/bros/modeling_bros.py @@ -152,8 +152,8 @@ class BrosSpadeOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - initial_token_logits: torch.FloatTensor = None - subsequent_token_logits: torch.FloatTensor = None + initial_token_logits: Optional[torch.FloatTensor] = None + subsequent_token_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/canine/modeling_canine.py b/src/transformers/models/canine/modeling_canine.py index 5d781cb116..7a699f6ca9 100644 --- a/src/transformers/models/canine/modeling_canine.py +++ b/src/transformers/models/canine/modeling_canine.py @@ -85,8 +85,8 @@ class CanineModelOutputWithPooling(ModelOutput): attention softmax, used to compute the weighted average in the self-attention heads. """ - last_hidden_state: torch.FloatTensor = None - pooler_output: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/chameleon/image_processing_chameleon.py b/src/transformers/models/chameleon/image_processing_chameleon.py index e4b8f87b55..2d1417a8ee 100644 --- a/src/transformers/models/chameleon/image_processing_chameleon.py +++ b/src/transformers/models/chameleon/image_processing_chameleon.py @@ -172,17 +172,17 @@ class ChameleonImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/chameleon/modeling_chameleon.py b/src/transformers/models/chameleon/modeling_chameleon.py index 7ccc660aac..65ace7cbcc 100644 --- a/src/transformers/models/chameleon/modeling_chameleon.py +++ b/src/transformers/models/chameleon/modeling_chameleon.py @@ -1254,8 +1254,8 @@ class ChameleonModel(ChameleonPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1550,8 +1550,8 @@ class ChameleonForConditionalGeneration(ChameleonPreTrainedModel, GenerationMixi @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index 476feaef0d..629de907b1 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -165,17 +165,17 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/chinese_clip/modeling_chinese_clip.py b/src/transformers/models/chinese_clip/modeling_chinese_clip.py index 018e9044bc..647e8f1c24 100644 --- a/src/transformers/models/chinese_clip/modeling_chinese_clip.py +++ b/src/transformers/models/chinese_clip/modeling_chinese_clip.py @@ -86,10 +86,10 @@ class ChineseCLIPOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPoolingAndCrossAttentions = None vision_model_output: BaseModelOutputWithPoolingAndCrossAttentions = None diff --git a/src/transformers/models/clap/modeling_clap.py b/src/transformers/models/clap/modeling_clap.py index 5792257e02..b2fdf0dd7e 100644 --- a/src/transformers/models/clap/modeling_clap.py +++ b/src/transformers/models/clap/modeling_clap.py @@ -154,7 +154,7 @@ class ClapTextModelOutput(ModelOutput): """ text_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -183,7 +183,7 @@ class ClapAudioModelOutput(ModelOutput): """ audio_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -212,10 +212,10 @@ class ClapOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_audio: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - audio_embeds: torch.FloatTensor = None + logits_per_audio: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + audio_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None audio_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py index 4a42c8f9ac..8c02cd14eb 100644 --- a/src/transformers/models/clip/image_processing_clip.py +++ b/src/transformers/models/clip/image_processing_clip.py @@ -200,17 +200,17 @@ class CLIPImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/clip/modeling_clip.py b/src/transformers/models/clip/modeling_clip.py index b06b6fdcf5..2df825a8ad 100644 --- a/src/transformers/models/clip/modeling_clip.py +++ b/src/transformers/models/clip/modeling_clip.py @@ -103,7 +103,7 @@ class CLIPVisionModelOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -132,7 +132,7 @@ class CLIPTextModelOutput(ModelOutput): """ text_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -160,10 +160,10 @@ class CLIPOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index aedea502e8..6afdadd252 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -108,10 +108,10 @@ class TFCLIPOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits_per_image: tf.Tensor = None - logits_per_text: tf.Tensor = None - text_embeds: tf.Tensor = None - image_embeds: tf.Tensor = None + logits_per_image: Optional[tf.Tensor] = None + logits_per_text: Optional[tf.Tensor] = None + text_embeds: Optional[tf.Tensor] = None + image_embeds: Optional[tf.Tensor] = None text_model_output: TFBaseModelOutputWithPooling = None vision_model_output: TFBaseModelOutputWithPooling = None @@ -225,9 +225,9 @@ class TFCLIPTextEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, ) -> tf.Tensor: """ Applies embedding based on inputs tensor. diff --git a/src/transformers/models/clipseg/modeling_clipseg.py b/src/transformers/models/clipseg/modeling_clipseg.py index 6aebd11dbe..a24847471f 100644 --- a/src/transformers/models/clipseg/modeling_clipseg.py +++ b/src/transformers/models/clipseg/modeling_clipseg.py @@ -81,10 +81,10 @@ class CLIPSegOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None @@ -110,7 +110,7 @@ class CLIPSegDecoderOutput(ModelOutput): the self-attention heads. """ - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -127,9 +127,9 @@ class CLIPSegImageSegmentationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - conditional_embeddings: torch.FloatTensor = None - pooled_output: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + conditional_embeddings: Optional[torch.FloatTensor] = None + pooled_output: Optional[torch.FloatTensor] = None vision_model_output: BaseModelOutputWithPooling = None decoder_output: CLIPSegDecoderOutput = None diff --git a/src/transformers/models/clvp/modeling_clvp.py b/src/transformers/models/clvp/modeling_clvp.py index a8feab0b1a..afbab29383 100644 --- a/src/transformers/models/clvp/modeling_clvp.py +++ b/src/transformers/models/clvp/modeling_clvp.py @@ -171,7 +171,7 @@ class ClvpEncoderOutput(ModelOutput): """ embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -211,15 +211,15 @@ class ClvpOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None speech_ids: Optional[torch.LongTensor] = None - logits_per_speech: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - speech_embeds: torch.FloatTensor = None + logits_per_speech: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + speech_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None speech_model_output: BaseModelOutputWithPooling = None - decoder_hidden_states: torch.FloatTensor = None - text_encoder_hidden_states: torch.FloatTensor = None - speech_encoder_hidden_states: torch.FloatTensor = None + decoder_hidden_states: Optional[torch.FloatTensor] = None + text_encoder_hidden_states: Optional[torch.FloatTensor] = None + speech_encoder_hidden_states: Optional[torch.FloatTensor] = None # Copied from transformers.models.llama.modeling_llama.LlamaRMSNorm with Llama->Clvp @@ -1737,8 +1737,8 @@ class ClvpModelForConditionalGeneration(ClvpPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=ClvpOutput, config_class=ClvpConfig) def forward( self, - input_ids: torch.LongTensor = None, - input_features: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + input_features: Optional[torch.FloatTensor] = None, conditioning_encoder_inputs_embeds: Optional[torch.FloatTensor] = None, text_encoder_inputs_embeds: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.LongTensor] = None, @@ -1868,8 +1868,8 @@ class ClvpModelForConditionalGeneration(ClvpPreTrainedModel, GenerationMixin): @torch.no_grad() def generate( self, - input_ids: torch.LongTensor = None, - input_features: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + input_features: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.LongTensor] = None, generation_config: Optional[GenerationConfig] = None, pad_to_max_mel_tokens: Optional[int] = None, diff --git a/src/transformers/models/codegen/tokenization_codegen.py b/src/transformers/models/codegen/tokenization_codegen.py index 2b584e83b1..f55d5a3f15 100644 --- a/src/transformers/models/codegen/tokenization_codegen.py +++ b/src/transformers/models/codegen/tokenization_codegen.py @@ -344,7 +344,7 @@ class CodeGenTokenizer(PreTrainedTokenizer): self, token_ids: Union[int, List[int], "np.ndarray", "torch.Tensor", "tf.Tensor"], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, truncate_before_pattern: Optional[List[str]] = None, **kwargs, ) -> str: diff --git a/src/transformers/models/codegen/tokenization_codegen_fast.py b/src/transformers/models/codegen/tokenization_codegen_fast.py index 86782cf807..6ca74ff532 100644 --- a/src/transformers/models/codegen/tokenization_codegen_fast.py +++ b/src/transformers/models/codegen/tokenization_codegen_fast.py @@ -192,7 +192,7 @@ class CodeGenTokenizerFast(PreTrainedTokenizerFast): self, token_ids: Union[int, List[int], "np.ndarray", "torch.Tensor", "tf.Tensor"], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, truncate_before_pattern: Optional[List[str]] = None, **kwargs, ) -> str: diff --git a/src/transformers/models/cohere/modeling_cohere.py b/src/transformers/models/cohere/modeling_cohere.py index eadc89697c..7d2af2bddd 100644 --- a/src/transformers/models/cohere/modeling_cohere.py +++ b/src/transformers/models/cohere/modeling_cohere.py @@ -550,7 +550,7 @@ class CohereModel(CoherePreTrainedModel): @add_start_docstrings_to_model_forward(COHERE_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -827,7 +827,7 @@ class CohereForCausalLM(CoherePreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/cohere/modular_cohere.py b/src/transformers/models/cohere/modular_cohere.py index 4c1ac5ff33..3b017a31fa 100644 --- a/src/transformers/models/cohere/modular_cohere.py +++ b/src/transformers/models/cohere/modular_cohere.py @@ -306,7 +306,7 @@ class CohereForCausalLM(LlamaForCausalLM): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/cohere2/modeling_cohere2.py b/src/transformers/models/cohere2/modeling_cohere2.py index 21489e9b78..600c14c1df 100644 --- a/src/transformers/models/cohere2/modeling_cohere2.py +++ b/src/transformers/models/cohere2/modeling_cohere2.py @@ -557,7 +557,7 @@ class Cohere2Model(Cohere2PreTrainedModel): @add_start_docstrings_to_model_forward(COHERE2_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridCache] = None, @@ -813,7 +813,7 @@ class Cohere2ForCausalLM(Cohere2PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/cohere2/modular_cohere2.py b/src/transformers/models/cohere2/modular_cohere2.py index d76a92accb..7c73f3e185 100644 --- a/src/transformers/models/cohere2/modular_cohere2.py +++ b/src/transformers/models/cohere2/modular_cohere2.py @@ -454,7 +454,7 @@ class Cohere2Model(Gemma2Model): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridCache] = None, diff --git a/src/transformers/models/colpali/modeling_colpali.py b/src/transformers/models/colpali/modeling_colpali.py index d84f29a341..4782bf7d30 100644 --- a/src/transformers/models/colpali/modeling_colpali.py +++ b/src/transformers/models/colpali/modeling_colpali.py @@ -111,7 +111,7 @@ class ColPaliForRetrievalOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - embeddings: torch.Tensor = None + embeddings: Optional[torch.Tensor] = None past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -191,8 +191,8 @@ class ColPaliForRetrieval(ColPaliPreTrainedModel): @replace_return_docstrings(output_type=ColPaliForRetrievalOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index 0c109f682e..923ccc20ab 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -948,7 +948,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): image: np.ndarray, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: @@ -1264,7 +1264,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): self, images: ImageInput, annotations: Optional[Union[AnnotationType, List[AnnotationType]]] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, do_resize: Optional[bool] = None, size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/conditional_detr/modeling_conditional_detr.py b/src/transformers/models/conditional_detr/modeling_conditional_detr.py index 5a839f9513..90b7b68bb4 100644 --- a/src/transformers/models/conditional_detr/modeling_conditional_detr.py +++ b/src/transformers/models/conditional_detr/modeling_conditional_detr.py @@ -178,8 +178,8 @@ class ConditionalDetrObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None last_hidden_state: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -248,9 +248,9 @@ class ConditionalDetrSegmentationOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None - pred_masks: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None + pred_masks: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None last_hidden_state: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -788,7 +788,7 @@ class ConditionalDetrEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - object_queries: torch.Tensor = None, + object_queries: Optional[torch.Tensor] = None, output_attentions: bool = False, ): """ diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index 9b2696a7e2..d9318f2b12 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -106,10 +106,10 @@ class TFConvBertEmbeddings(keras.layers.Layer): # Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, past_key_values_length=0, training: bool = False, ) -> tf.Tensor: diff --git a/src/transformers/models/convnext/image_processing_convnext.py b/src/transformers/models/convnext/image_processing_convnext.py index 35cbc91797..e6b3125167 100644 --- a/src/transformers/models/convnext/image_processing_convnext.py +++ b/src/transformers/models/convnext/image_processing_convnext.py @@ -90,7 +90,7 @@ class ConvNextImageProcessor(BaseImageProcessor): self, do_resize: bool = True, size: Dict[str, int] = None, - crop_pct: float = None, + crop_pct: Optional[float] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -187,13 +187,13 @@ class ConvNextImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, - crop_pct: float = None, + crop_pct: Optional[float] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/convnext/modeling_convnext.py b/src/transformers/models/convnext/modeling_convnext.py index 8eeb98b089..5c769926d2 100755 --- a/src/transformers/models/convnext/modeling_convnext.py +++ b/src/transformers/models/convnext/modeling_convnext.py @@ -347,7 +347,7 @@ class ConvNextModel(ConvNextPreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, BaseModelOutputWithPoolingAndNoAttention]: @@ -413,7 +413,7 @@ class ConvNextForImageClassification(ConvNextPreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/convnextv2/modeling_convnextv2.py b/src/transformers/models/convnextv2/modeling_convnextv2.py index 98e5ba1551..a9d8332ff0 100644 --- a/src/transformers/models/convnextv2/modeling_convnextv2.py +++ b/src/transformers/models/convnextv2/modeling_convnextv2.py @@ -367,7 +367,7 @@ class ConvNextV2Model(ConvNextV2PreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, BaseModelOutputWithPoolingAndNoAttention]: @@ -434,7 +434,7 @@ class ConvNextV2ForImageClassification(ConvNextV2PreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/cvt/modeling_cvt.py b/src/transformers/models/cvt/modeling_cvt.py index 45bb6aa49f..c2ca9eab5a 100644 --- a/src/transformers/models/cvt/modeling_cvt.py +++ b/src/transformers/models/cvt/modeling_cvt.py @@ -60,8 +60,8 @@ class BaseModelOutputWithCLSToken(ModelOutput): plus the initial embedding outputs. """ - last_hidden_state: torch.FloatTensor = None - cls_token_value: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + cls_token_value: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/cvt/modeling_tf_cvt.py b/src/transformers/models/cvt/modeling_tf_cvt.py index fa9a4d9a3a..74202d8806 100644 --- a/src/transformers/models/cvt/modeling_tf_cvt.py +++ b/src/transformers/models/cvt/modeling_tf_cvt.py @@ -65,8 +65,8 @@ class TFBaseModelOutputWithCLSToken(ModelOutput): the initial embedding outputs. """ - last_hidden_state: tf.Tensor = None - cls_token_value: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None + cls_token_value: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None diff --git a/src/transformers/models/dab_detr/modeling_dab_detr.py b/src/transformers/models/dab_detr/modeling_dab_detr.py index 3e3294db07..84d6f276a8 100644 --- a/src/transformers/models/dab_detr/modeling_dab_detr.py +++ b/src/transformers/models/dab_detr/modeling_dab_detr.py @@ -174,8 +174,8 @@ class DabDetrObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None last_hidden_state: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/dac/modeling_dac.py b/src/transformers/models/dac/modeling_dac.py index 48e213693b..47a45f44e9 100644 --- a/src/transformers/models/dac/modeling_dac.py +++ b/src/transformers/models/dac/modeling_dac.py @@ -53,11 +53,11 @@ class DacOutput(ModelOutput): Projected latents (continuous representation of input before quantization). """ - loss: torch.FloatTensor = None - audio_values: torch.FloatTensor = None - quantized_representation: torch.FloatTensor = None - audio_codes: torch.LongTensor = None - projected_latents: torch.FloatTensor = None + loss: Optional[torch.FloatTensor] = None + audio_values: Optional[torch.FloatTensor] = None + quantized_representation: Optional[torch.FloatTensor] = None + audio_codes: Optional[torch.LongTensor] = None + projected_latents: Optional[torch.FloatTensor] = None @dataclass @@ -74,10 +74,10 @@ class DacEncoderOutput(ModelOutput): Projected latents (continuous representation of input before quantization). """ - loss: torch.FloatTensor = None - quantized_representation: torch.FloatTensor = None - audio_codes: torch.FloatTensor = None - projected_latents: torch.FloatTensor = None + loss: Optional[torch.FloatTensor] = None + quantized_representation: Optional[torch.FloatTensor] = None + audio_codes: Optional[torch.FloatTensor] = None + projected_latents: Optional[torch.FloatTensor] = None @dataclass @@ -89,7 +89,7 @@ class DacDecoderOutput(ModelOutput): Decoded audio values, obtained using the decoder part of Dac. """ - audio_values: torch.FloatTensor = None + audio_values: Optional[torch.FloatTensor] = None class Snake1d(nn.Module): diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index 56056c4313..813fad89dc 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -90,8 +90,8 @@ class TFData2VecVisionModelOutputWithPooling(TFBaseModelOutputWithPooling): heads. """ - last_hidden_state: tf.Tensor = None - pooler_output: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None + pooler_output: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/dbrx/modeling_dbrx.py b/src/transformers/models/dbrx/modeling_dbrx.py index 5303f84edc..258023c60b 100644 --- a/src/transformers/models/dbrx/modeling_dbrx.py +++ b/src/transformers/models/dbrx/modeling_dbrx.py @@ -744,7 +744,7 @@ class DbrxBlock(nn.Module): self, hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - position_ids: torch.LongTensor = None, + position_ids: Optional[torch.LongTensor] = None, past_key_value: Optional[Cache] = None, output_attentions: Optional[bool] = False, output_router_logits: Optional[bool] = False, diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index 6a8b233978..cad1517113 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -207,9 +207,9 @@ class TFDebertaAttention(keras.layers.Layer): self, input_tensor: tf.Tensor, attention_mask: tf.Tensor, - query_states: tf.Tensor = None, - relative_pos: tf.Tensor = None, - rel_embeddings: tf.Tensor = None, + query_states: Optional[tf.Tensor] = None, + relative_pos: Optional[tf.Tensor] = None, + rel_embeddings: Optional[tf.Tensor] = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -318,9 +318,9 @@ class TFDebertaLayer(keras.layers.Layer): self, hidden_states: tf.Tensor, attention_mask: tf.Tensor, - query_states: tf.Tensor = None, - relative_pos: tf.Tensor = None, - rel_embeddings: tf.Tensor = None, + query_states: Optional[tf.Tensor] = None, + relative_pos: Optional[tf.Tensor] = None, + rel_embeddings: Optional[tf.Tensor] = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -408,8 +408,8 @@ class TFDebertaEncoder(keras.layers.Layer): self, hidden_states: tf.Tensor, attention_mask: tf.Tensor, - query_states: tf.Tensor = None, - relative_pos: tf.Tensor = None, + query_states: Optional[tf.Tensor] = None, + relative_pos: Optional[tf.Tensor] = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = True, @@ -650,9 +650,9 @@ class TFDebertaDisentangledSelfAttention(keras.layers.Layer): self, hidden_states: tf.Tensor, attention_mask: tf.Tensor, - query_states: tf.Tensor = None, - relative_pos: tf.Tensor = None, - rel_embeddings: tf.Tensor = None, + query_states: Optional[tf.Tensor] = None, + relative_pos: Optional[tf.Tensor] = None, + rel_embeddings: Optional[tf.Tensor] = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -880,11 +880,11 @@ class TFDebertaEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, - mask: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, + mask: Optional[tf.Tensor] = None, training: bool = False, ) -> tf.Tensor: """ diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index b7b5a01d17..899564eef0 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -192,9 +192,9 @@ class TFDebertaV2Attention(keras.layers.Layer): self, input_tensor: tf.Tensor, attention_mask: tf.Tensor, - query_states: tf.Tensor = None, - relative_pos: tf.Tensor = None, - rel_embeddings: tf.Tensor = None, + query_states: Optional[tf.Tensor] = None, + relative_pos: Optional[tf.Tensor] = None, + rel_embeddings: Optional[tf.Tensor] = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -306,9 +306,9 @@ class TFDebertaV2Layer(keras.layers.Layer): self, hidden_states: tf.Tensor, attention_mask: tf.Tensor, - query_states: tf.Tensor = None, - relative_pos: tf.Tensor = None, - rel_embeddings: tf.Tensor = None, + query_states: Optional[tf.Tensor] = None, + relative_pos: Optional[tf.Tensor] = None, + rel_embeddings: Optional[tf.Tensor] = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -485,8 +485,8 @@ class TFDebertaV2Encoder(keras.layers.Layer): self, hidden_states: tf.Tensor, attention_mask: tf.Tensor, - query_states: tf.Tensor = None, - relative_pos: tf.Tensor = None, + query_states: Optional[tf.Tensor] = None, + relative_pos: Optional[tf.Tensor] = None, output_attentions: bool = False, output_hidden_states: bool = False, return_dict: bool = True, @@ -718,9 +718,9 @@ class TFDebertaV2DisentangledSelfAttention(keras.layers.Layer): self, hidden_states: tf.Tensor, attention_mask: tf.Tensor, - query_states: tf.Tensor = None, - relative_pos: tf.Tensor = None, - rel_embeddings: tf.Tensor = None, + query_states: Optional[tf.Tensor] = None, + relative_pos: Optional[tf.Tensor] = None, + rel_embeddings: Optional[tf.Tensor] = None, output_attentions: bool = False, training: bool = False, ) -> Tuple[tf.Tensor]: @@ -985,11 +985,11 @@ class TFDebertaV2Embeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, - mask: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, + mask: Optional[tf.Tensor] = None, training: bool = False, ) -> tf.Tensor: """ diff --git a/src/transformers/models/decision_transformer/modeling_decision_transformer.py b/src/transformers/models/decision_transformer/modeling_decision_transformer.py index faf24729d6..ab8c190775 100755 --- a/src/transformers/models/decision_transformer/modeling_decision_transformer.py +++ b/src/transformers/models/decision_transformer/modeling_decision_transformer.py @@ -732,12 +732,12 @@ class DecisionTransformerOutput(ModelOutput): heads. """ - state_preds: torch.FloatTensor = None - action_preds: torch.FloatTensor = None - return_preds: torch.FloatTensor = None - hidden_states: torch.FloatTensor = None - attentions: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None + state_preds: Optional[torch.FloatTensor] = None + action_preds: Optional[torch.FloatTensor] = None + return_preds: Optional[torch.FloatTensor] = None + hidden_states: Optional[torch.FloatTensor] = None + attentions: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None class DecisionTransformerPreTrainedModel(PreTrainedModel): diff --git a/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py b/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py index 5a09e85779..2e2ddc2d13 100644 --- a/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py +++ b/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py @@ -698,7 +698,7 @@ class DeepseekV3Model(DeepseekV3PreTrainedModel): @add_start_docstrings_to_model_forward(DEEPSEEK_V3_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -973,7 +973,7 @@ class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index e44733be81..0a7dd1b06d 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -946,7 +946,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): image: np.ndarray, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: @@ -1262,7 +1262,7 @@ class DeformableDetrImageProcessor(BaseImageProcessor): self, images: ImageInput, annotations: Optional[Union[AnnotationType, List[AnnotationType]]] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, do_resize: Optional[bool] = None, size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py index 27c9aaa371..8f78d8a7bf 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py @@ -346,7 +346,7 @@ class DeformableDetrImageProcessorFast(BaseImageProcessorFast): image: torch.Tensor, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py index 007129c5bd..4e177dde1a 100755 --- a/src/transformers/models/deformable_detr/modeling_deformable_detr.py +++ b/src/transformers/models/deformable_detr/modeling_deformable_detr.py @@ -140,9 +140,9 @@ class DeformableDetrDecoderOutput(ModelOutput): used to compute the weighted average in the cross-attention heads. """ - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -192,10 +192,10 @@ class DeformableDetrModelOutput(ModelOutput): Logits of predicted bounding boxes coordinates in the first stage. """ - init_reference_points: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + init_reference_points: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -269,8 +269,8 @@ class DeformableDetrObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None init_reference_points: Optional[torch.FloatTensor] = None last_hidden_state: Optional[torch.FloatTensor] = None @@ -785,7 +785,7 @@ class DeformableDetrEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_embeddings: torch.Tensor = None, + position_embeddings: Optional[torch.Tensor] = None, reference_points=None, spatial_shapes=None, spatial_shapes_list=None, diff --git a/src/transformers/models/deit/image_processing_deit.py b/src/transformers/models/deit/image_processing_deit.py index 770a1b2ec8..d1eceebcb5 100644 --- a/src/transformers/models/deit/image_processing_deit.py +++ b/src/transformers/models/deit/image_processing_deit.py @@ -163,14 +163,14 @@ class DeiTImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample=None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py index b8cae05f1f..cb88bca353 100644 --- a/src/transformers/models/deit/modeling_deit.py +++ b/src/transformers/models/deit/modeling_deit.py @@ -905,9 +905,9 @@ class DeiTForImageClassificationWithTeacherOutput(ModelOutput): the self-attention heads. """ - logits: torch.FloatTensor = None - cls_logits: torch.FloatTensor = None - distillation_logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + cls_logits: Optional[torch.FloatTensor] = None + distillation_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py index 48725cd816..49c9526803 100644 --- a/src/transformers/models/deit/modeling_tf_deit.py +++ b/src/transformers/models/deit/modeling_tf_deit.py @@ -88,9 +88,9 @@ class TFDeiTForImageClassificationWithTeacherOutput(ModelOutput): the self-attention heads. """ - logits: tf.Tensor = None - cls_logits: tf.Tensor = None - distillation_logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None + cls_logits: Optional[tf.Tensor] = None + distillation_logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/deprecated/deta/image_processing_deta.py b/src/transformers/models/deprecated/deta/image_processing_deta.py index e59b7bd95b..0cfdc03e81 100644 --- a/src/transformers/models/deprecated/deta/image_processing_deta.py +++ b/src/transformers/models/deprecated/deta/image_processing_deta.py @@ -593,7 +593,7 @@ class DetaImageProcessor(BaseImageProcessor): image: np.ndarray, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: @@ -889,7 +889,7 @@ class DetaImageProcessor(BaseImageProcessor): self, images: ImageInput, annotations: Optional[Union[List[Dict], List[List[Dict]]]] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, do_resize: Optional[bool] = None, size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/deprecated/deta/modeling_deta.py b/src/transformers/models/deprecated/deta/modeling_deta.py index 075b490cfa..a37fb60a87 100644 --- a/src/transformers/models/deprecated/deta/modeling_deta.py +++ b/src/transformers/models/deprecated/deta/modeling_deta.py @@ -178,9 +178,9 @@ class DetaDecoderOutput(ModelOutput): used to compute the weighted average in the cross-attention heads. """ - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -232,10 +232,10 @@ class DetaModelOutput(ModelOutput): Logits of proposal bounding boxes coordinates in the gen_encoder_output_proposals. """ - init_reference_points: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + init_reference_points: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -312,8 +312,8 @@ class DetaObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None init_reference_points: Optional[torch.FloatTensor] = None last_hidden_state: Optional[torch.FloatTensor] = None @@ -843,7 +843,7 @@ class DetaEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_embeddings: torch.Tensor = None, + position_embeddings: Optional[torch.Tensor] = None, reference_points=None, spatial_shapes=None, level_start_index=None, diff --git a/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py index f99ac6c324..1c42759ed2 100644 --- a/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py @@ -181,7 +181,7 @@ class EfficientFormerImageProcessor(BaseImageProcessor): do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py b/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py index 306790021a..f86656c0b1 100644 --- a/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py @@ -717,9 +717,9 @@ class EfficientFormerForImageClassificationWithTeacherOutput(ModelOutput): the self-attention heads. """ - logits: torch.FloatTensor = None - cls_logits: torch.FloatTensor = None - distillation_logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + cls_logits: Optional[torch.FloatTensor] = None + distillation_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/deprecated/efficientformer/modeling_tf_efficientformer.py b/src/transformers/models/deprecated/efficientformer/modeling_tf_efficientformer.py index d47d06e783..76fdaa1f08 100644 --- a/src/transformers/models/deprecated/efficientformer/modeling_tf_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/modeling_tf_efficientformer.py @@ -1087,9 +1087,9 @@ class TFEfficientFormerForImageClassificationWithTeacherOutput(ModelOutput): the self-attention heads. """ - logits: tf.Tensor = None - cls_logits: tf.Tensor = None - distillation_logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None + cls_logits: Optional[tf.Tensor] = None + distillation_logits: Optional[tf.Tensor] = None hidden_states: Optional[Tuple[tf.Tensor]] = None attentions: Optional[Tuple[tf.Tensor]] = None diff --git a/src/transformers/models/deprecated/graphormer/configuration_graphormer.py b/src/transformers/models/deprecated/graphormer/configuration_graphormer.py index e82eaa75b9..e32a853ae1 100644 --- a/src/transformers/models/deprecated/graphormer/configuration_graphormer.py +++ b/src/transformers/models/deprecated/graphormer/configuration_graphormer.py @@ -155,7 +155,7 @@ class GraphormerConfig(PretrainedConfig): pre_layernorm: bool = False, apply_graphormer_init: bool = False, activation_fn: str = "gelu", - embed_scale: float = None, + embed_scale: Optional[float] = None, freeze_embeddings: bool = False, num_trans_layers_to_freeze: int = 0, traceable: bool = False, diff --git a/src/transformers/models/deprecated/nat/modeling_nat.py b/src/transformers/models/deprecated/nat/modeling_nat.py index b3827f3787..0a59c827cd 100644 --- a/src/transformers/models/deprecated/nat/modeling_nat.py +++ b/src/transformers/models/deprecated/nat/modeling_nat.py @@ -97,7 +97,7 @@ class NatEncoderOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -132,7 +132,7 @@ class NatModelOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -169,7 +169,7 @@ class NatImageClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/deprecated/nezha/modeling_nezha.py b/src/transformers/models/deprecated/nezha/modeling_nezha.py index 3346a4f835..1f76a21771 100644 --- a/src/transformers/models/deprecated/nezha/modeling_nezha.py +++ b/src/transformers/models/deprecated/nezha/modeling_nezha.py @@ -760,8 +760,8 @@ class NezhaForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - seq_relationship_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + seq_relationship_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py index 98bc7fb70a..4b3f07d7a8 100644 --- a/src/transformers/models/deprecated/open_llama/modeling_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py @@ -552,7 +552,7 @@ class OpenLlamaModel(OpenLlamaPreTrainedModel): @add_start_docstrings_to_model_forward(OPEN_LLAMA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -710,7 +710,7 @@ class OpenLlamaForCausalLM(OpenLlamaPreTrainedModel): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -883,7 +883,7 @@ class OpenLlamaForSequenceClassification(OpenLlamaPreTrainedModel): @add_start_docstrings_to_model_forward(OPEN_LLAMA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/deprecated/realm/modeling_realm.py b/src/transformers/models/deprecated/realm/modeling_realm.py index d5bf922f42..b518849ced 100644 --- a/src/transformers/models/deprecated/realm/modeling_realm.py +++ b/src/transformers/models/deprecated/realm/modeling_realm.py @@ -663,7 +663,7 @@ class RealmEmbedderOutput(ModelOutput): heads. """ - projected_score: torch.FloatTensor = None + projected_score: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -682,9 +682,9 @@ class RealmScorerOutput(ModelOutput): Candidate score derived from the embedder. """ - relevance_score: torch.FloatTensor = None - query_score: torch.FloatTensor = None - candidate_score: torch.FloatTensor = None + relevance_score: Optional[torch.FloatTensor] = None + query_score: Optional[torch.FloatTensor] = None + candidate_score: Optional[torch.FloatTensor] = None @dataclass @@ -724,13 +724,13 @@ class RealmReaderOutput(ModelOutput): heads. """ - loss: torch.FloatTensor = None - retriever_loss: torch.FloatTensor = None - reader_loss: torch.FloatTensor = None + loss: Optional[torch.FloatTensor] = None + retriever_loss: Optional[torch.FloatTensor] = None + reader_loss: Optional[torch.FloatTensor] = None retriever_correct: torch.BoolTensor = None reader_correct: torch.BoolTensor = None - block_idx: torch.LongTensor = None - candidate: torch.LongTensor = None + block_idx: Optional[torch.LongTensor] = None + candidate: Optional[torch.LongTensor] = None start_pos: torch.int32 = None end_pos: torch.int32 = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -751,7 +751,7 @@ class RealmForOpenQAOutput(ModelOutput): """ reader_output: dict = None - predicted_answer_ids: torch.LongTensor = None + predicted_answer_ids: Optional[torch.LongTensor] = None class RealmPredictionHeadTransform(nn.Module): diff --git a/src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py b/src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py index 5bb787b87d..52afb77885 100644 --- a/src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py +++ b/src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py @@ -140,7 +140,7 @@ class TrajectoryTransformerOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py index 3a0f9c5ca4..496638e5f2 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py @@ -690,7 +690,7 @@ class TFTransfoXLModelOutput(ModelOutput): heads. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None mems: List[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -723,7 +723,7 @@ class TFTransfoXLLMHeadModelOutput(ModelOutput): heads. """ - prediction_scores: tf.Tensor = None + prediction_scores: Optional[tf.Tensor] = None mems: List[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -757,7 +757,7 @@ class TFTransfoXLSequenceClassifierOutputWithPast(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None mems: List[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py index cbab6f2108..abe7e59927 100644 --- a/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py @@ -651,7 +651,7 @@ class TransfoXLSequenceClassifierOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None mems: List[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -687,7 +687,7 @@ class TransfoXLLMHeadModelOutput(ModelOutput): """ losses: Optional[torch.FloatTensor] = None - prediction_scores: torch.FloatTensor = None + prediction_scores: Optional[torch.FloatTensor] = None mems: List[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py index bde5830a51..2c5f853b2a 100644 --- a/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py @@ -220,14 +220,14 @@ class TvltImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, @@ -277,16 +277,16 @@ class TvltImageProcessor(BaseImageProcessor): def preprocess( self, videos: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, patch_size: List[int] = None, num_frames: Optional[int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, is_mixed: bool = False, diff --git a/src/transformers/models/deprecated/tvlt/modeling_tvlt.py b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py index aab3d4ff2d..561b7f90d1 100644 --- a/src/transformers/models/deprecated/tvlt/modeling_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/modeling_tvlt.py @@ -75,13 +75,13 @@ class TvltModelOutput(ModelOutput): the self-attention heads. """ - last_hidden_state: torch.FloatTensor = None - last_pixel_hidden_state: torch.FloatTensor = None - last_audio_hidden_state: torch.FloatTensor = None - pixel_label_masks: torch.LongTensor = None - audio_label_masks: torch.LongTensor = None - pixel_ids_restore: torch.LongTensor = None - audio_ids_restore: torch.LongTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + last_pixel_hidden_state: Optional[torch.FloatTensor] = None + last_audio_hidden_state: Optional[torch.FloatTensor] = None + pixel_label_masks: Optional[torch.LongTensor] = None + audio_label_masks: Optional[torch.LongTensor] = None + pixel_ids_restore: Optional[torch.LongTensor] = None + audio_ids_restore: Optional[torch.LongTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -104,7 +104,7 @@ class TvltDecoderOutput(ModelOutput): the self-attention heads. """ - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -136,9 +136,9 @@ class TvltForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - matching_logits: torch.FloatTensor = None - pixel_logits: torch.FloatTensor = None - audio_logits: torch.FloatTensor = None + matching_logits: Optional[torch.FloatTensor] = None + pixel_logits: Optional[torch.FloatTensor] = None + audio_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py index 2dbb1d0202..c78790f134 100644 --- a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py @@ -192,17 +192,17 @@ class ViTHybridImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py b/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py index 35c089599b..cf1abf5bba 100644 --- a/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py +++ b/src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py @@ -308,7 +308,7 @@ class XLMProphetNetSeq2SeqLMOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None logits_ngram: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[torch.FloatTensor]] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -528,7 +528,7 @@ class XLMProphetNetDecoderLMOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None logits_ngram: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/depth_pro/modeling_depth_pro.py b/src/transformers/models/depth_pro/modeling_depth_pro.py index 67715723d1..c26bf484f5 100644 --- a/src/transformers/models/depth_pro/modeling_depth_pro.py +++ b/src/transformers/models/depth_pro/modeling_depth_pro.py @@ -62,7 +62,7 @@ class DepthProOutput(ModelOutput): heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None features: Union[torch.FloatTensor, List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -94,7 +94,7 @@ class DepthProDepthEstimatorOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - predicted_depth: torch.FloatTensor = None + predicted_depth: Optional[torch.FloatTensor] = None field_of_view: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 7eb51a01de..b2677af859 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -928,7 +928,7 @@ class DetrImageProcessor(BaseImageProcessor): image: np.ndarray, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: @@ -1237,7 +1237,7 @@ class DetrImageProcessor(BaseImageProcessor): self, images: ImageInput, annotations: Optional[Union[AnnotationType, List[AnnotationType]]] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, do_resize: Optional[bool] = None, size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/detr/image_processing_detr_fast.py b/src/transformers/models/detr/image_processing_detr_fast.py index 16bef79e59..b6227ce5c5 100644 --- a/src/transformers/models/detr/image_processing_detr_fast.py +++ b/src/transformers/models/detr/image_processing_detr_fast.py @@ -371,7 +371,7 @@ class DetrImageProcessorFast(BaseImageProcessorFast): image: torch.Tensor, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 0b006c44ad..cb47f58bda 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -169,8 +169,8 @@ class DetrObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None last_hidden_state: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -238,9 +238,9 @@ class DetrSegmentationOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None - pred_masks: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None + pred_masks: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None last_hidden_state: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -632,7 +632,7 @@ class DetrEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - object_queries: torch.Tensor = None, + object_queries: Optional[torch.Tensor] = None, output_attentions: bool = False, ): """ diff --git a/src/transformers/models/diffllama/modeling_diffllama.py b/src/transformers/models/diffllama/modeling_diffllama.py index b25c19384e..07ad84cb07 100644 --- a/src/transformers/models/diffllama/modeling_diffllama.py +++ b/src/transformers/models/diffllama/modeling_diffllama.py @@ -794,7 +794,7 @@ class DiffLlamaModel(DiffLlamaPreTrainedModel): @add_start_docstrings_to_model_forward(DIFFLLAMA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1069,7 +1069,7 @@ class DiffLlamaForCausalLM(DiffLlamaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/dinat/modeling_dinat.py b/src/transformers/models/dinat/modeling_dinat.py index 69677e4064..0e0121b78d 100644 --- a/src/transformers/models/dinat/modeling_dinat.py +++ b/src/transformers/models/dinat/modeling_dinat.py @@ -97,7 +97,7 @@ class DinatEncoderOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -132,7 +132,7 @@ class DinatModelOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -169,7 +169,7 @@ class DinatImageClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/donut/image_processing_donut.py b/src/transformers/models/donut/image_processing_donut.py index a10e2846cb..239bc54db2 100644 --- a/src/transformers/models/donut/image_processing_donut.py +++ b/src/transformers/models/donut/image_processing_donut.py @@ -299,16 +299,16 @@ class DonutImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_thumbnail: bool = None, - do_align_long_axis: bool = None, - do_pad: bool = None, + do_thumbnail: Optional[bool] = None, + do_align_long_axis: Optional[bool] = None, + do_pad: Optional[bool] = None, random_padding: bool = False, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/donut/modeling_donut_swin.py b/src/transformers/models/donut/modeling_donut_swin.py index 929d730884..0d44069fc8 100644 --- a/src/transformers/models/donut/modeling_donut_swin.py +++ b/src/transformers/models/donut/modeling_donut_swin.py @@ -78,7 +78,7 @@ class DonutSwinEncoderOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -114,7 +114,7 @@ class DonutSwinModelOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/dpr/modeling_dpr.py b/src/transformers/models/dpr/modeling_dpr.py index 79317202b8..3ff4aa1152 100644 --- a/src/transformers/models/dpr/modeling_dpr.py +++ b/src/transformers/models/dpr/modeling_dpr.py @@ -127,8 +127,8 @@ class DPRReaderOutput(ModelOutput): """ start_logits: torch.FloatTensor - end_logits: torch.FloatTensor = None - relevance_logits: torch.FloatTensor = None + end_logits: Optional[torch.FloatTensor] = None + relevance_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index 49a750fa4f..303b03ec24 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -18,7 +18,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Tuple, Union +from typing import Optional, Tuple, Union import tensorflow as tf @@ -68,7 +68,7 @@ class TFDPRContextEncoderOutput(ModelOutput): heads. """ - pooler_output: tf.Tensor = None + pooler_output: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -96,7 +96,7 @@ class TFDPRQuestionEncoderOutput(ModelOutput): heads. """ - pooler_output: tf.Tensor = None + pooler_output: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -127,9 +127,9 @@ class TFDPRReaderOutput(ModelOutput): heads. """ - start_logits: tf.Tensor = None - end_logits: tf.Tensor = None - relevance_logits: tf.Tensor = None + start_logits: Optional[tf.Tensor] = None + end_logits: Optional[tf.Tensor] = None + relevance_logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -155,13 +155,13 @@ class TFDPREncoderLayer(keras.layers.Layer): @unpack_inputs def call( self, - input_ids: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None, token_type_ids: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None, - output_attentions: bool = None, - output_hidden_states: bool = None, - return_dict: bool = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, training: bool = False, ) -> Union[TFBaseModelOutputWithPooling, Tuple[tf.Tensor, ...]]: outputs = self.bert_model( @@ -226,7 +226,7 @@ class TFDPRSpanPredictorLayer(keras.layers.Layer): @unpack_inputs def call( self, - input_ids: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None, output_attentions: bool = False, @@ -296,7 +296,7 @@ class TFDPRSpanPredictor(TFPreTrainedModel): @unpack_inputs def call( self, - input_ids: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None, token_type_ids: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None, @@ -329,7 +329,7 @@ class TFDPREncoder(TFPreTrainedModel): @unpack_inputs def call( self, - input_ids: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, attention_mask: tf.Tensor | None = None, token_type_ids: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None, diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py index 72d77edf9a..d034ff0a4f 100644 --- a/src/transformers/models/dpt/image_processing_dpt.py +++ b/src/transformers/models/dpt/image_processing_dpt.py @@ -294,18 +294,18 @@ class DPTImageProcessor(BaseImageProcessor): def _preprocess( self, image: ImageInput, - do_reduce_labels: bool = None, - do_resize: bool = None, + do_reduce_labels: Optional[bool] = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - keep_aspect_ratio: bool = None, + keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_pad: bool = None, + do_pad: Optional[bool] = None, size_divisor: Optional[int] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ): @@ -336,17 +336,17 @@ class DPTImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - keep_aspect_ratio: bool = None, + keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_pad: bool = None, + do_pad: Optional[bool] = None, size_divisor: Optional[int] = None, data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -387,12 +387,12 @@ class DPTImageProcessor(BaseImageProcessor): def _preprocess_segmentation_map( self, segmentation_map: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - keep_aspect_ratio: bool = None, + keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, - do_reduce_labels: bool = None, + do_reduce_labels: Optional[bool] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ): """Preprocesses a single segmentation map.""" @@ -436,17 +436,17 @@ class DPTImageProcessor(BaseImageProcessor): self, images: ImageInput, segmentation_maps: Optional[ImageInput] = None, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Optional[int] = None, - keep_aspect_ratio: bool = None, + keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_pad: bool = None, + do_pad: Optional[bool] = None, size_divisor: Optional[int] = None, do_reduce_labels: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py index c9bbaa1716..c69bf618fe 100755 --- a/src/transformers/models/dpt/modeling_dpt.py +++ b/src/transformers/models/dpt/modeling_dpt.py @@ -66,7 +66,7 @@ class BaseModelOutputWithIntermediateActivations(ModelOutput): Intermediate activations that can be used to compute hidden states of the model at various layers. """ - last_hidden_states: torch.FloatTensor = None + last_hidden_states: Optional[torch.FloatTensor] = None intermediate_activations: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -99,8 +99,8 @@ class BaseModelOutputWithPoolingAndIntermediateActivations(ModelOutput): Intermediate activations that can be used to compute hidden states of the model at various layers. """ - last_hidden_state: torch.FloatTensor = None - pooler_output: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None intermediate_activations: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet.py b/src/transformers/models/efficientnet/image_processing_efficientnet.py index 79f92ec1ca..612ede7086 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py @@ -212,18 +212,18 @@ class EfficientNetImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample=None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - rescale_offset: bool = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + rescale_offset: Optional[bool] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - include_top: bool = None, + include_top: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/efficientnet/modeling_efficientnet.py b/src/transformers/models/efficientnet/modeling_efficientnet.py index 0ab5fa2e6a..9e0b890729 100644 --- a/src/transformers/models/efficientnet/modeling_efficientnet.py +++ b/src/transformers/models/efficientnet/modeling_efficientnet.py @@ -527,7 +527,7 @@ class EfficientNetModel(EfficientNetPreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, BaseModelOutputWithPoolingAndNoAttention]: @@ -591,7 +591,7 @@ class EfficientNetForImageClassification(EfficientNetPreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py index 3921ca5079..7b73f02212 100644 --- a/src/transformers/models/electra/modeling_electra.py +++ b/src/transformers/models/electra/modeling_electra.py @@ -711,7 +711,7 @@ class ElectraForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 827241d0a8..6dc3ac8ebf 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -601,10 +601,10 @@ class TFElectraEmbeddings(keras.layers.Layer): # Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, past_key_values_length=0, training: bool = False, ) -> tf.Tensor: @@ -931,7 +931,7 @@ class TFElectraForPreTrainingOutput(ModelOutput): heads. """ - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/emu3/image_processing_emu3.py b/src/transformers/models/emu3/image_processing_emu3.py index 1cc02f58dd..a63269c99e 100644 --- a/src/transformers/models/emu3/image_processing_emu3.py +++ b/src/transformers/models/emu3/image_processing_emu3.py @@ -167,14 +167,14 @@ class Emu3ImageProcessor(BaseImageProcessor): def _preprocess( self, images: Union[ImageInput, VideoInput], - do_resize: bool = None, + do_resize: Optional[bool] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ): @@ -308,15 +308,15 @@ class Emu3ImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, do_pad: bool = True, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, diff --git a/src/transformers/models/emu3/modeling_emu3.py b/src/transformers/models/emu3/modeling_emu3.py index e013e86632..80b17541b9 100644 --- a/src/transformers/models/emu3/modeling_emu3.py +++ b/src/transformers/models/emu3/modeling_emu3.py @@ -706,7 +706,7 @@ class Emu3VQVAEMiddleBlock(nn.Module): quant_channels=quant_channels, ) - def forward(self, hidden_states: torch.FloatTensor, quant_states: torch.FloatTensor = None): + def forward(self, hidden_states: torch.FloatTensor, quant_states: Optional[torch.FloatTensor] = None): hidden_states = self.block_1(hidden_states, quant_states) residual = hidden_states hidden_states = self.attn_norm(hidden_states, quant_states) @@ -1379,7 +1379,7 @@ class Emu3TextModel(Emu3PreTrainedModel): @add_start_docstrings_to_model_forward(EMU3_TEXT_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1655,7 +1655,7 @@ class Emu3ForCausalLM(Emu3PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class="Emu3TextConfig") def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -1875,9 +1875,9 @@ class Emu3ForConditionalGeneration(Emu3PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - image_sizes: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + image_sizes: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/emu3/modular_emu3.py b/src/transformers/models/emu3/modular_emu3.py index 8af5ec700a..031dc26f0a 100644 --- a/src/transformers/models/emu3/modular_emu3.py +++ b/src/transformers/models/emu3/modular_emu3.py @@ -436,7 +436,7 @@ class Emu3VQVAEMiddleBlock(nn.Module): quant_channels=quant_channels, ) - def forward(self, hidden_states: torch.FloatTensor, quant_states: torch.FloatTensor = None): + def forward(self, hidden_states: torch.FloatTensor, quant_states: Optional[torch.FloatTensor] = None): hidden_states = self.block_1(hidden_states, quant_states) residual = hidden_states hidden_states = self.attn_norm(hidden_states, quant_states) @@ -1175,9 +1175,9 @@ class Emu3ForConditionalGeneration(Emu3PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - image_sizes: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + image_sizes: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/encodec/feature_extraction_encodec.py b/src/transformers/models/encodec/feature_extraction_encodec.py index 3f83c660b2..f33191862e 100644 --- a/src/transformers/models/encodec/feature_extraction_encodec.py +++ b/src/transformers/models/encodec/feature_extraction_encodec.py @@ -57,8 +57,8 @@ class EncodecFeatureExtractor(SequenceFeatureExtractor): feature_size: int = 1, sampling_rate: int = 24000, padding_value: float = 0.0, - chunk_length_s: float = None, - overlap: float = None, + chunk_length_s: Optional[float] = None, + overlap: Optional[float] = None, **kwargs, ): super().__init__(feature_size=feature_size, sampling_rate=sampling_rate, padding_value=padding_value, **kwargs) diff --git a/src/transformers/models/encodec/modeling_encodec.py b/src/transformers/models/encodec/modeling_encodec.py index e2b279ca67..670ac99e03 100644 --- a/src/transformers/models/encodec/modeling_encodec.py +++ b/src/transformers/models/encodec/modeling_encodec.py @@ -50,8 +50,8 @@ class EncodecOutput(ModelOutput): Decoded audio values, obtained using the decoder part of Encodec. """ - audio_codes: torch.LongTensor = None - audio_values: torch.FloatTensor = None + audio_codes: Optional[torch.LongTensor] = None + audio_values: Optional[torch.FloatTensor] = None @dataclass @@ -64,8 +64,8 @@ class EncodecEncoderOutput(ModelOutput): Scaling factor for each `audio_codes` input. This is used to unscale each chunk of audio when decoding. """ - audio_codes: torch.LongTensor = None - audio_scales: torch.FloatTensor = None + audio_codes: Optional[torch.LongTensor] = None + audio_scales: Optional[torch.FloatTensor] = None @dataclass @@ -76,7 +76,7 @@ class EncodecDecoderOutput(ModelOutput): Decoded audio values, obtained using the decoder part of Encodec. """ - audio_values: torch.FloatTensor = None + audio_values: Optional[torch.FloatTensor] = None class EncodecConv1d(nn.Module): @@ -589,7 +589,7 @@ class EncodecModel(EncodecPreTrainedModel): def encode( self, input_values: torch.Tensor, - padding_mask: torch.Tensor = None, + padding_mask: Optional[torch.Tensor] = None, bandwidth: Optional[float] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple[torch.Tensor, Optional[torch.Tensor]], EncodecEncoderOutput]: diff --git a/src/transformers/models/ernie/modeling_ernie.py b/src/transformers/models/ernie/modeling_ernie.py index 975466f551..559078b1ef 100644 --- a/src/transformers/models/ernie/modeling_ernie.py +++ b/src/transformers/models/ernie/modeling_ernie.py @@ -700,8 +700,8 @@ class ErnieForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - seq_relationship_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + seq_relationship_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/esm/modeling_esmfold.py b/src/transformers/models/esm/modeling_esmfold.py index d06f9a7d43..645c9d16a5 100644 --- a/src/transformers/models/esm/modeling_esmfold.py +++ b/src/transformers/models/esm/modeling_esmfold.py @@ -112,29 +112,29 @@ class EsmForProteinFoldingOutput(ModelOutput): Per-sample maximum predicted error. """ - frames: torch.FloatTensor = None - sidechain_frames: torch.FloatTensor = None - unnormalized_angles: torch.FloatTensor = None - angles: torch.FloatTensor = None - positions: torch.FloatTensor = None - states: torch.FloatTensor = None - s_s: torch.FloatTensor = None - s_z: torch.FloatTensor = None - distogram_logits: torch.FloatTensor = None - lm_logits: torch.FloatTensor = None - aatype: torch.FloatTensor = None - atom14_atom_exists: torch.FloatTensor = None - residx_atom14_to_atom37: torch.FloatTensor = None - residx_atom37_to_atom14: torch.FloatTensor = None - atom37_atom_exists: torch.FloatTensor = None - residue_index: torch.FloatTensor = None - lddt_head: torch.FloatTensor = None - plddt: torch.FloatTensor = None - ptm_logits: torch.FloatTensor = None - ptm: torch.FloatTensor = None - aligned_confidence_probs: torch.FloatTensor = None - predicted_aligned_error: torch.FloatTensor = None - max_predicted_aligned_error: torch.FloatTensor = None + frames: Optional[torch.FloatTensor] = None + sidechain_frames: Optional[torch.FloatTensor] = None + unnormalized_angles: Optional[torch.FloatTensor] = None + angles: Optional[torch.FloatTensor] = None + positions: Optional[torch.FloatTensor] = None + states: Optional[torch.FloatTensor] = None + s_s: Optional[torch.FloatTensor] = None + s_z: Optional[torch.FloatTensor] = None + distogram_logits: Optional[torch.FloatTensor] = None + lm_logits: Optional[torch.FloatTensor] = None + aatype: Optional[torch.FloatTensor] = None + atom14_atom_exists: Optional[torch.FloatTensor] = None + residx_atom14_to_atom37: Optional[torch.FloatTensor] = None + residx_atom37_to_atom14: Optional[torch.FloatTensor] = None + atom37_atom_exists: Optional[torch.FloatTensor] = None + residue_index: Optional[torch.FloatTensor] = None + lddt_head: Optional[torch.FloatTensor] = None + plddt: Optional[torch.FloatTensor] = None + ptm_logits: Optional[torch.FloatTensor] = None + ptm: Optional[torch.FloatTensor] = None + aligned_confidence_probs: Optional[torch.FloatTensor] = None + predicted_aligned_error: Optional[torch.FloatTensor] = None + max_predicted_aligned_error: Optional[torch.FloatTensor] = None ESMFOLD_INPUTS_DOCSTRING = r""" diff --git a/src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py b/src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py index 81c1eef895..590786b195 100644 --- a/src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +++ b/src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py @@ -78,15 +78,15 @@ class FastSpeech2ConformerModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - spectrogram: torch.FloatTensor = None + spectrogram: Optional[torch.FloatTensor] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - duration_outputs: torch.LongTensor = None - pitch_outputs: torch.FloatTensor = None - energy_outputs: torch.FloatTensor = None + duration_outputs: Optional[torch.LongTensor] = None + pitch_outputs: Optional[torch.FloatTensor] = None + energy_outputs: Optional[torch.FloatTensor] = None @dataclass @@ -133,7 +133,7 @@ class FastSpeech2ConformerWithHifiGanOutput(FastSpeech2ConformerModelOutput): Outputs of the energy predictor. """ - waveform: torch.FloatTensor = None + waveform: Optional[torch.FloatTensor] = None _CONFIG_FOR_DOC = "FastSpeech2ConformerConfig" diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index a080068155..43ce980aa7 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -808,7 +808,7 @@ class TFFlaubertWithLMHeadModelOutput(ModelOutput): heads. """ - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py index 960c8189ae..3b8b8128c8 100644 --- a/src/transformers/models/flava/image_processing_flava.py +++ b/src/transformers/models/flava/image_processing_flava.py @@ -63,7 +63,7 @@ class FlavaMaskingGenerator: mask_group_max_patches: Optional[int] = None, mask_group_min_patches: int = 16, mask_group_min_aspect_ratio: Optional[float] = 0.3, - mask_group_max_aspect_ratio: float = None, + mask_group_max_aspect_ratio: Optional[float] = None, ): if not isinstance(input_size, tuple): input_size = (input_size,) * 2 @@ -246,7 +246,7 @@ class FlavaImageProcessor(BaseImageProcessor): # Codebook related params return_codebook_pixels: bool = False, codebook_do_resize: bool = True, - codebook_size: bool = None, + codebook_size: Optional[bool] = None, codebook_resample: int = PILImageResampling.LANCZOS, codebook_do_center_crop: bool = True, codebook_crop_size: Optional[int] = None, @@ -389,17 +389,17 @@ class FlavaImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_map_pixels: bool = None, + do_map_pixels: Optional[bool] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[ChannelDimension] = None, ) -> np.ndarray: diff --git a/src/transformers/models/flava/modeling_flava.py b/src/transformers/models/flava/modeling_flava.py index 330f7c4e7b..74076eddf2 100644 --- a/src/transformers/models/flava/modeling_flava.py +++ b/src/transformers/models/flava/modeling_flava.py @@ -1803,7 +1803,7 @@ class FlavaForPreTraining(FlavaPreTrainedModel): bool_masked_pos: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, image_attention_mask: Optional[torch.Tensor] = None, - skip_unmasked_multimodal_encoder: bool = None, + skip_unmasked_multimodal_encoder: Optional[bool] = None, mlm_labels: Optional[torch.Tensor] = None, mim_labels: Optional[torch.Tensor] = None, itm_labels: Optional[torch.Tensor] = None, diff --git a/src/transformers/models/fnet/modeling_fnet.py b/src/transformers/models/fnet/modeling_fnet.py index 9d02d35210..63aaa42e9d 100755 --- a/src/transformers/models/fnet/modeling_fnet.py +++ b/src/transformers/models/fnet/modeling_fnet.py @@ -444,8 +444,8 @@ class FNetForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - seq_relationship_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + seq_relationship_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/fnet/tokenization_fnet.py b/src/transformers/models/fnet/tokenization_fnet.py index 877a50cc2d..c113a505ef 100644 --- a/src/transformers/models/fnet/tokenization_fnet.py +++ b/src/transformers/models/fnet/tokenization_fnet.py @@ -221,7 +221,7 @@ class FNetTokenizer(PreTrainedTokenizer): self, token_ids: List[int], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, spaces_between_special_tokens: bool = False, **kwargs, ) -> str: diff --git a/src/transformers/models/focalnet/modeling_focalnet.py b/src/transformers/models/focalnet/modeling_focalnet.py index a5cf2981b1..143d4e066b 100644 --- a/src/transformers/models/focalnet/modeling_focalnet.py +++ b/src/transformers/models/focalnet/modeling_focalnet.py @@ -75,7 +75,7 @@ class FocalNetEncoderOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -103,7 +103,7 @@ class FocalNetModelOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -133,7 +133,7 @@ class FocalNetMaskedImageModelingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - reconstruction: torch.FloatTensor = None + reconstruction: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -162,7 +162,7 @@ class FocalNetImageClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index 8bbdf19550..df53e2bb67 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -482,7 +482,7 @@ class FSMTEncoder(nn.Module): self, input_ids: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - inputs_embeds: torch.Tensor = None, + inputs_embeds: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, output_attentions: bool = False, output_hidden_states: bool = False, diff --git a/src/transformers/models/funnel/modeling_funnel.py b/src/transformers/models/funnel/modeling_funnel.py index 0d2f689da8..c5fecd0cc7 100644 --- a/src/transformers/models/funnel/modeling_funnel.py +++ b/src/transformers/models/funnel/modeling_funnel.py @@ -841,7 +841,7 @@ class FunnelForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index d4efd7ba0a..0f8e76b99f 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -1104,7 +1104,7 @@ class TFFunnelForPreTrainingOutput(ModelOutput): heads. """ - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/fuyu/modeling_fuyu.py b/src/transformers/models/fuyu/modeling_fuyu.py index 3f6d31aab3..fd19ff7b8d 100644 --- a/src/transformers/models/fuyu/modeling_fuyu.py +++ b/src/transformers/models/fuyu/modeling_fuyu.py @@ -227,9 +227,11 @@ class FuyuForCausalLM(FuyuPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - image_patches: torch.Tensor = None, # [batch_size, num_total_patches, patch_size_ x patch_size x num_channels ] - image_patches_indices: torch.Tensor = None, + input_ids: Optional[torch.LongTensor] = None, + image_patches: Optional[ + torch.Tensor + ] = None, # [batch_size, num_total_patches, patch_size_ x patch_size x num_channels ] + image_patches_indices: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index 31df29e6a0..8fdcd613c2 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -517,7 +517,7 @@ class GemmaModel(GemmaPreTrainedModel): @add_start_docstrings_to_model_forward(GEMMA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -794,7 +794,7 @@ class GemmaForCausalLM(GemmaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/gemma/modular_gemma.py b/src/transformers/models/gemma/modular_gemma.py index 50b1d33dcc..735c508f6d 100644 --- a/src/transformers/models/gemma/modular_gemma.py +++ b/src/transformers/models/gemma/modular_gemma.py @@ -369,7 +369,7 @@ class GemmaMLP(LlamaMLP): class GemmaModel(LlamaModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/gemma2/modeling_gemma2.py b/src/transformers/models/gemma2/modeling_gemma2.py index 556849d0bc..38e78666f5 100644 --- a/src/transformers/models/gemma2/modeling_gemma2.py +++ b/src/transformers/models/gemma2/modeling_gemma2.py @@ -562,7 +562,7 @@ class Gemma2Model(Gemma2PreTrainedModel): @add_start_docstrings_to_model_forward(GEMMA2_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridCache] = None, @@ -822,7 +822,7 @@ class Gemma2ForCausalLM(Gemma2PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridCache] = None, diff --git a/src/transformers/models/gemma2/modular_gemma2.py b/src/transformers/models/gemma2/modular_gemma2.py index 4f8d2e1ba4..d197d978e8 100644 --- a/src/transformers/models/gemma2/modular_gemma2.py +++ b/src/transformers/models/gemma2/modular_gemma2.py @@ -404,7 +404,7 @@ class Gemma2Model(GemmaModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridCache] = None, @@ -576,7 +576,7 @@ class Gemma2ForCausalLM(GemmaForCausalLM): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridCache] = None, diff --git a/src/transformers/models/gemma3/image_processing_gemma3.py b/src/transformers/models/gemma3/image_processing_gemma3.py index e8d6e87243..f9156ab1b6 100644 --- a/src/transformers/models/gemma3/image_processing_gemma3.py +++ b/src/transformers/models/gemma3/image_processing_gemma3.py @@ -102,11 +102,11 @@ class Gemma3ImageProcessor(BaseImageProcessor): do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, - do_pan_and_scan: bool = None, + do_convert_rgb: Optional[bool] = None, + do_pan_and_scan: Optional[bool] = None, pan_and_scan_min_crop_size: Optional[int] = None, pan_and_scan_max_num_crops: Optional[int] = None, - pan_and_scan_min_ratio_to_activate: float = None, + pan_and_scan_min_ratio_to_activate: Optional[float] = None, **kwargs, ) -> None: super().__init__(**kwargs) @@ -240,22 +240,22 @@ class Gemma3ImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, - do_convert_rgb: bool = None, - do_pan_and_scan: bool = None, + do_convert_rgb: Optional[bool] = None, + do_pan_and_scan: Optional[bool] = None, pan_and_scan_min_crop_size: Optional[int] = None, pan_and_scan_max_num_crops: Optional[int] = None, - pan_and_scan_min_ratio_to_activate: float = None, + pan_and_scan_min_ratio_to_activate: Optional[float] = None, ) -> PIL.Image.Image: """ Preprocess an image or batch of images. diff --git a/src/transformers/models/gemma3/modeling_gemma3.py b/src/transformers/models/gemma3/modeling_gemma3.py index 1078a01d93..6dad88c1bc 100644 --- a/src/transformers/models/gemma3/modeling_gemma3.py +++ b/src/transformers/models/gemma3/modeling_gemma3.py @@ -86,7 +86,7 @@ class Gemma3CausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -913,7 +913,7 @@ class Gemma3ForCausalLM(Gemma3PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridCache] = None, @@ -1223,8 +1223,8 @@ class Gemma3ForConditionalGeneration(Gemma3PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=Gemma3CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index 001bbd8f19..c4de8d928d 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -333,7 +333,7 @@ class Gemma3CausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -854,8 +854,8 @@ class Gemma3ForConditionalGeneration(PaliGemmaForConditionalGeneration): @replace_return_docstrings(output_type=Gemma3CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py index 0d37d04e81..7efdf2d45c 100644 --- a/src/transformers/models/git/modeling_git.py +++ b/src/transformers/models/git/modeling_git.py @@ -77,7 +77,7 @@ class GitVisionModelOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/glm/modeling_glm.py b/src/transformers/models/glm/modeling_glm.py index 7d77f3f2f1..5b40a8addc 100644 --- a/src/transformers/models/glm/modeling_glm.py +++ b/src/transformers/models/glm/modeling_glm.py @@ -533,7 +533,7 @@ class GlmModel(GlmPreTrainedModel): @add_start_docstrings_to_model_forward(GLM_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -808,7 +808,7 @@ class GlmForCausalLM(GlmPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py index d052f4a543..875c0742b9 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py @@ -266,7 +266,7 @@ class GotOcr2ImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> PIL.Image.Image: diff --git a/src/transformers/models/got_ocr2/modeling_got_ocr2.py b/src/transformers/models/got_ocr2/modeling_got_ocr2.py index 83fa36f0e1..1278af4faa 100644 --- a/src/transformers/models/got_ocr2/modeling_got_ocr2.py +++ b/src/transformers/models/got_ocr2/modeling_got_ocr2.py @@ -320,7 +320,7 @@ class GotOcr2VisionEncoderOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -550,7 +550,7 @@ class GotOcr2CausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -741,8 +741,8 @@ class GotOcr2ForConditionalGeneration(GotOcr2PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=GotOcr2CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/got_ocr2/modular_got_ocr2.py b/src/transformers/models/got_ocr2/modular_got_ocr2.py index aed41cc285..36d2db007b 100644 --- a/src/transformers/models/got_ocr2/modular_got_ocr2.py +++ b/src/transformers/models/got_ocr2/modular_got_ocr2.py @@ -385,8 +385,8 @@ class GotOcr2ForConditionalGeneration(LlavaForConditionalGeneration): @replace_return_docstrings(output_type=GotOcr2CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 1af1366925..75c148f233 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -532,8 +532,8 @@ class GPT2DoubleHeadsModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None mc_loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - mc_logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + mc_logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 41eb5c19ef..5812e42af7 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -628,8 +628,8 @@ class TFGPT2DoubleHeadsModelOutput(ModelOutput): heads. """ - logits: tf.Tensor = None - mc_logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None + mc_logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/gpt2/tokenization_gpt2_tf.py b/src/transformers/models/gpt2/tokenization_gpt2_tf.py index 0c0fdb3ae8..34e6ca2d25 100644 --- a/src/transformers/models/gpt2/tokenization_gpt2_tf.py +++ b/src/transformers/models/gpt2/tokenization_gpt2_tf.py @@ -1,5 +1,5 @@ import os -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union import tensorflow as tf from keras_nlp.tokenizers import BytePairTokenizer @@ -25,7 +25,13 @@ class TFGPT2Tokenizer(keras.layers.Layer): merges (List[str]): Merges list for Byte Pair Tokenizer """ - def __init__(self, vocab: Dict[str, int], merges: List[str], max_length: int = None, pad_token_id: int = None): + def __init__( + self, + vocab: Dict[str, int], + merges: List[str], + max_length: Optional[int] = None, + pad_token_id: Optional[int] = None, + ): super().__init__() self.pad_token_id = pad_token_id self.max_length = max_length @@ -88,7 +94,7 @@ class TFGPT2Tokenizer(keras.layers.Layer): "pad_token_id": self.pad_token_id, } - def call(self, x, max_length: int = None): + def call(self, x, max_length: Optional[int] = None): input_ids = self.tf_tokenizer(x) attention_mask = tf.ones_like(input_ids) diff --git a/src/transformers/models/granite/modeling_granite.py b/src/transformers/models/granite/modeling_granite.py index 74bb0d054f..4345036ef1 100644 --- a/src/transformers/models/granite/modeling_granite.py +++ b/src/transformers/models/granite/modeling_granite.py @@ -534,7 +534,7 @@ class GraniteModel(GranitePreTrainedModel): @add_start_docstrings_to_model_forward(GRANITE_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -807,7 +807,7 @@ class GraniteForCausalLM(GranitePreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/granite/modular_granite.py b/src/transformers/models/granite/modular_granite.py index f6d99e1c30..494ab5f182 100644 --- a/src/transformers/models/granite/modular_granite.py +++ b/src/transformers/models/granite/modular_granite.py @@ -122,7 +122,7 @@ class GraniteModel(LlamaModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -232,7 +232,7 @@ class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ... class GraniteForCausalLM(LlamaForCausalLM): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/granitemoe/modeling_granitemoe.py b/src/transformers/models/granitemoe/modeling_granitemoe.py index 39441473cb..f2d4fc7f9a 100644 --- a/src/transformers/models/granitemoe/modeling_granitemoe.py +++ b/src/transformers/models/granitemoe/modeling_granitemoe.py @@ -989,7 +989,7 @@ class GraniteMoeModel(GraniteMoePreTrainedModel): @add_start_docstrings_to_model_forward(GRANITEMOE_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -1285,7 +1285,7 @@ class GraniteMoeForCausalLM(GraniteMoePreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py b/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py index 29d1b598f4..a47dd45ba7 100644 --- a/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py +++ b/src/transformers/models/granitemoeshared/modeling_granitemoeshared.py @@ -935,7 +935,7 @@ class GraniteMoeSharedModel(GraniteMoeSharedPreTrainedModel): @add_start_docstrings_to_model_forward(GRANITEMOESHARED_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -1311,7 +1311,7 @@ class GraniteMoeSharedForCausalLM(GraniteMoeSharedPreTrainedModel, GenerationMix @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py index dcf6fda897..03a6c2e4e3 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py @@ -986,7 +986,7 @@ class GroundingDinoImageProcessor(BaseImageProcessor): image: np.ndarray, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: @@ -1302,7 +1302,7 @@ class GroundingDinoImageProcessor(BaseImageProcessor): self, images: ImageInput, annotations: Optional[Union[AnnotationType, List[AnnotationType]]] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, do_resize: Optional[bool] = None, size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/transformers/models/grounding_dino/modeling_grounding_dino.py index 0b3b2899c1..a238c1dc1d 100644 --- a/src/transformers/models/grounding_dino/modeling_grounding_dino.py +++ b/src/transformers/models/grounding_dino/modeling_grounding_dino.py @@ -132,9 +132,9 @@ class GroundingDinoDecoderOutput(ModelOutput): weighted average in the self-attention, cross-attention and multi-scale deformable attention heads. """ - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None @@ -166,8 +166,8 @@ class GroundingDinoEncoderOutput(ModelOutput): multi-scale deformable attention heads. """ - last_hidden_state_vision: torch.FloatTensor = None - last_hidden_state_text: torch.FloatTensor = None + last_hidden_state_vision: Optional[torch.FloatTensor] = None + last_hidden_state_text: Optional[torch.FloatTensor] = None vision_hidden_states: Optional[Tuple[torch.FloatTensor]] = None text_hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None @@ -225,10 +225,10 @@ class GroundingDinoModelOutput(ModelOutput): Coordinates of top `config.num_queries` scoring bounding boxes in the first stage. """ - last_hidden_state: torch.FloatTensor = None - init_reference_points: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + init_reference_points: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None encoder_last_hidden_state_vision: Optional[torch.FloatTensor] = None @@ -314,8 +314,8 @@ class GroundingDinoObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None last_hidden_state: Optional[torch.FloatTensor] = None init_reference_points: Optional[torch.FloatTensor] = None @@ -1012,7 +1012,7 @@ class GroundingDinoDeformableLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_embeddings: torch.Tensor = None, + position_embeddings: Optional[torch.Tensor] = None, reference_points=None, spatial_shapes=None, spatial_shapes_list=None, @@ -2547,8 +2547,8 @@ class GroundingDinoForObjectDetection(GroundingDinoPreTrainedModel): self, pixel_values: torch.FloatTensor, input_ids: torch.LongTensor, - token_type_ids: torch.LongTensor = None, - attention_mask: torch.LongTensor = None, + token_type_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.LongTensor] = None, pixel_mask: Optional[torch.BoolTensor] = None, encoder_outputs: Optional[Union[GroundingDinoEncoderOutput, Tuple]] = None, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/groupvit/modeling_groupvit.py b/src/transformers/models/groupvit/modeling_groupvit.py index 889b200552..6a5c235cf8 100644 --- a/src/transformers/models/groupvit/modeling_groupvit.py +++ b/src/transformers/models/groupvit/modeling_groupvit.py @@ -302,11 +302,11 @@ class GroupViTModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - segmentation_logits: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + segmentation_logits: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/groupvit/modeling_tf_groupvit.py b/src/transformers/models/groupvit/modeling_tf_groupvit.py index 7c6b3d05f3..a6b62ae70c 100644 --- a/src/transformers/models/groupvit/modeling_tf_groupvit.py +++ b/src/transformers/models/groupvit/modeling_tf_groupvit.py @@ -253,11 +253,11 @@ class TFGroupViTModelOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits_per_image: tf.Tensor = None - logits_per_text: tf.Tensor = None - segmentation_logits: tf.Tensor = None - text_embeds: tf.Tensor = None - image_embeds: tf.Tensor = None + logits_per_image: Optional[tf.Tensor] = None + logits_per_text: Optional[tf.Tensor] = None + segmentation_logits: Optional[tf.Tensor] = None + text_embeds: Optional[tf.Tensor] = None + image_embeds: Optional[tf.Tensor] = None text_model_output: TFBaseModelOutputWithPooling = None vision_model_output: TFBaseModelOutputWithPooling = None @@ -646,9 +646,9 @@ class TFGroupViTTextEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, ) -> tf.Tensor: """ Applies embedding based on inputs tensor. @@ -898,10 +898,10 @@ class TFGroupViTAttention(keras.layers.Layer): def call( self, hidden_states: tf.Tensor, - attention_mask: tf.Tensor = None, - causal_attention_mask: tf.Tensor = None, - output_attentions: bool = None, - encoder_hidden_states: tf.Tensor = None, + attention_mask: Optional[tf.Tensor] = None, + causal_attention_mask: Optional[tf.Tensor] = None, + output_attentions: Optional[bool] = None, + encoder_hidden_states: Optional[tf.Tensor] = None, training: bool = False, ) -> Tuple[tf.Tensor]: """Input shape: Batch x Time x Channel""" diff --git a/src/transformers/models/helium/modeling_helium.py b/src/transformers/models/helium/modeling_helium.py index 6b786f656c..fdd38da56e 100644 --- a/src/transformers/models/helium/modeling_helium.py +++ b/src/transformers/models/helium/modeling_helium.py @@ -520,7 +520,7 @@ class HeliumModel(HeliumPreTrainedModel): @add_start_docstrings_to_model_forward(HELIUM_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -795,7 +795,7 @@ class HeliumForCausalLM(HeliumPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/hiera/modeling_hiera.py b/src/transformers/models/hiera/modeling_hiera.py index dd602e9f04..14a8dad524 100644 --- a/src/transformers/models/hiera/modeling_hiera.py +++ b/src/transformers/models/hiera/modeling_hiera.py @@ -85,7 +85,7 @@ class HieraEncoderOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -124,10 +124,10 @@ class HieraModelOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None bool_masked_pos: torch.BoolTensor = None - ids_restore: torch.LongTensor = None + ids_restore: Optional[torch.LongTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -163,7 +163,7 @@ class HieraForImageClassificationOutput(ImageClassifierOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -198,9 +198,9 @@ class HieraForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None bool_masked_pos: torch.BoolTensor = None - ids_restore: torch.LongTensor = None + ids_restore: Optional[torch.LongTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/idefics/modeling_idefics.py b/src/transformers/models/idefics/modeling_idefics.py index 9c21213b0b..02d5fb3c05 100644 --- a/src/transformers/models/idefics/modeling_idefics.py +++ b/src/transformers/models/idefics/modeling_idefics.py @@ -96,7 +96,7 @@ class IdeficsBaseModelOutputWithPast(ModelOutput): image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -138,7 +138,7 @@ class IdeficsCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1096,7 +1096,7 @@ class IdeficsModel(IdeficsPreTrainedModel): @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1556,7 +1556,7 @@ class IdeficsForVisionText2Text(IdeficsPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=IdeficsCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/idefics/modeling_tf_idefics.py b/src/transformers/models/idefics/modeling_tf_idefics.py index 8ca859f8cf..057988d992 100644 --- a/src/transformers/models/idefics/modeling_tf_idefics.py +++ b/src/transformers/models/idefics/modeling_tf_idefics.py @@ -91,7 +91,7 @@ class TFIdeficsBaseModelOutputWithPast(ModelOutput): image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None hidden_states: Optional[Tuple[tf.Tensor]] = None attentions: Optional[Tuple[tf.Tensor]] = None @@ -133,7 +133,7 @@ class TFIdeficsCausalLMOutputWithPast(ModelOutput): """ loss: Optional[tf.Tensor] = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: Optional[List[tf.Tensor]] = None hidden_states: Optional[Tuple[tf.Tensor]] = None attentions: Optional[Tuple[tf.Tensor]] = None diff --git a/src/transformers/models/idefics/vision.py b/src/transformers/models/idefics/vision.py index c01591b5a6..5e9f9b8ad7 100644 --- a/src/transformers/models/idefics/vision.py +++ b/src/transformers/models/idefics/vision.py @@ -55,7 +55,7 @@ class IdeficsVisionModelOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/idefics/vision_tf.py b/src/transformers/models/idefics/vision_tf.py index 7acfa01939..c01e1c2e1f 100644 --- a/src/transformers/models/idefics/vision_tf.py +++ b/src/transformers/models/idefics/vision_tf.py @@ -55,7 +55,7 @@ class TFIdeficsVisionModelOutput(ModelOutput): """ image_embeds: Optional[tf.Tensor] = None - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Optional[Tuple[tf.Tensor]] = None attentions: Optional[Tuple[tf.Tensor]] = None diff --git a/src/transformers/models/idefics2/modeling_idefics2.py b/src/transformers/models/idefics2/modeling_idefics2.py index 5ee9ec4cc7..16ff2873b1 100644 --- a/src/transformers/models/idefics2/modeling_idefics2.py +++ b/src/transformers/models/idefics2/modeling_idefics2.py @@ -76,7 +76,7 @@ class Idefics2BaseModelOutputWithPast(ModelOutput): image_hidden_states of the model produced by the vision encoder, and optionally by the perceiver """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -114,7 +114,7 @@ class Idefics2CausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1101,7 +1101,7 @@ class Idefics2Model(Idefics2PreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1294,7 +1294,7 @@ class Idefics2ForConditionalGeneration(Idefics2PreTrainedModel, GenerationMixin) @replace_return_docstrings(output_type=Idefics2CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/idefics3/modeling_idefics3.py b/src/transformers/models/idefics3/modeling_idefics3.py index 3821bd3e7a..64193c2a5d 100644 --- a/src/transformers/models/idefics3/modeling_idefics3.py +++ b/src/transformers/models/idefics3/modeling_idefics3.py @@ -75,7 +75,7 @@ class Idefics3BaseModelOutputWithPast(ModelOutput): image_hidden_states of the model produced by the vision encoder """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -113,7 +113,7 @@ class Idefics3CausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -833,7 +833,7 @@ class Idefics3Model(Idefics3PreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1016,7 +1016,7 @@ class Idefics3ForConditionalGeneration(Idefics3PreTrainedModel, GenerationMixin) @replace_return_docstrings(output_type=Idefics3CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt.py b/src/transformers/models/imagegpt/image_processing_imagegpt.py index 357baf70d6..07e7604574 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py @@ -177,10 +177,10 @@ class ImageGPTImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_normalize: bool = None, + do_normalize: Optional[bool] = None, do_color_quantize: Optional[bool] = None, clusters: Optional[Union[List[List[int]], np.ndarray]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/informer/modeling_informer.py b/src/transformers/models/informer/modeling_informer.py index 3f37662459..a67950233f 100644 --- a/src/transformers/models/informer/modeling_informer.py +++ b/src/transformers/models/informer/modeling_informer.py @@ -178,7 +178,7 @@ class InformerNOPScaler(nn.Module): self.keepdim = config.keepdim if hasattr(config, "keepdim") else True def forward( - self, data: torch.Tensor, observed_indicator: torch.Tensor = None + self, data: torch.Tensor, observed_indicator: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Parameters: diff --git a/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py b/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py index 37cec22a9b..6c9bf4d4d3 100644 --- a/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py @@ -172,7 +172,7 @@ class InstructBlipVideoImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> BatchFeature: @@ -290,7 +290,7 @@ class InstructBlipVideoImageProcessor(BaseImageProcessor): do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: diff --git a/src/transformers/models/jamba/modeling_jamba.py b/src/transformers/models/jamba/modeling_jamba.py index 4e4e5f71bf..9a797c81d9 100755 --- a/src/transformers/models/jamba/modeling_jamba.py +++ b/src/transformers/models/jamba/modeling_jamba.py @@ -1233,7 +1233,7 @@ class JambaModel(JambaPreTrainedModel): @add_start_docstrings_to_model_forward(JAMBA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridMambaAttentionDynamicCache] = None, @@ -1432,7 +1432,7 @@ class JambaForCausalLM(JambaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[HybridMambaAttentionDynamicCache] = None, diff --git a/src/transformers/models/jetmoe/modeling_jetmoe.py b/src/transformers/models/jetmoe/modeling_jetmoe.py index ae2bb44bf9..1b87473f5f 100644 --- a/src/transformers/models/jetmoe/modeling_jetmoe.py +++ b/src/transformers/models/jetmoe/modeling_jetmoe.py @@ -989,7 +989,7 @@ class JetMoeModel(JetMoePreTrainedModel): @add_start_docstrings_to_model_forward(JETMOE_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -1295,7 +1295,7 @@ class JetMoeForCausalLM(JetMoePreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/kosmos2/modeling_kosmos2.py b/src/transformers/models/kosmos2/modeling_kosmos2.py index b74f060ced..23a1391fa1 100644 --- a/src/transformers/models/kosmos2/modeling_kosmos2.py +++ b/src/transformers/models/kosmos2/modeling_kosmos2.py @@ -309,7 +309,7 @@ class Kosmos2ModelOutput(ModelOutput): input) to speed up sequential decoding. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -367,7 +367,7 @@ class Kosmos2ForConditionalGenerationModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -837,10 +837,10 @@ class Kosmos2TextSinusoidalPositionalEmbedding(nn.Module): @torch.no_grad() def forward( self, - input_ids: torch.Tensor = None, - inputs_embeds: torch.Tensor = None, + input_ids: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.Tensor] = None, past_key_values_length: int = 0, - position_ids: torch.Tensor = None, + position_ids: Optional[torch.Tensor] = None, ): if input_ids is not None: bsz, seq_len = input_ids.size() @@ -1187,11 +1187,11 @@ class Kosmos2TextTransformer(nn.Module): def forward_embedding( self, input_ids, - inputs_embeds: torch.Tensor = None, - image_embeds: torch.Tensor = None, - img_input_mask: torch.Tensor = None, + inputs_embeds: Optional[torch.Tensor] = None, + image_embeds: Optional[torch.Tensor] = None, + img_input_mask: Optional[torch.Tensor] = None, past_key_values_length: int = 0, - position_ids: torch.Tensor = None, + position_ids: Optional[torch.Tensor] = None, ): # The argument `inputs_embeds` should be the one without being multiplied by `self.embed_scale`. if inputs_embeds is None: diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 43215f6157..c17d0f1ec5 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -127,11 +127,11 @@ class TFLayoutLMEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - bbox: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + bbox: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, training: bool = False, ) -> tf.Tensor: """ diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py index 4f326e36e2..aa9c737bfa 100644 --- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py @@ -198,10 +198,10 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - apply_ocr: bool = None, + apply_ocr: Optional[bool] = None, ocr_lang: Optional[str] = None, tesseract_config: Optional[str] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py index 2a2dda439e..d324c1ac7d 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py @@ -561,7 +561,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, @@ -621,7 +621,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, @@ -675,7 +675,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): def _batch_prepare_for_model( self, batch_text_or_text_pairs, - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[int]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py index 0b974c283d..5d36e9fd27 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py @@ -312,7 +312,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, @@ -449,7 +449,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index 61c73d38b3..246e9dcf1f 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -225,15 +225,15 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample=None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Union[float, Iterable[float]] = None, image_std: Union[float, Iterable[float]] = None, - apply_ocr: bool = None, + apply_ocr: Optional[bool] = None, ocr_lang: Optional[str] = None, tesseract_config: Optional[str] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index c0762afb49..4cdd15d5e4 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -231,7 +231,7 @@ class TFLayoutLMv3TextEmbeddings(keras.layers.Layer): def call( self, input_ids: tf.Tensor | None = None, - bbox: tf.Tensor = None, + bbox: Optional[tf.Tensor] = None, token_type_ids: tf.Tensor | None = None, position_ids: tf.Tensor | None = None, inputs_embeds: tf.Tensor | None = None, diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py index 12114ac339..b88f7b4c1b 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py @@ -691,7 +691,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, @@ -752,7 +752,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, @@ -807,7 +807,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): def _batch_prepare_for_model( self, batch_text_or_text_pairs, - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[int]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py index 50875e2341..737a50df9f 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py @@ -357,7 +357,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, @@ -496,7 +496,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py index 1c94bfbd25..f72039c884 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py @@ -593,7 +593,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, @@ -647,7 +647,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): def _batch_prepare_for_model( self, batch_text_or_text_pairs, - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[int]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py index d45d4d988b..4c16642c57 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py @@ -436,7 +436,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index 67eb44503f..9f80075338 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -1247,7 +1247,7 @@ class LEDSeq2SeqModelOutput(ModelOutput): in the sequence. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -1314,7 +1314,7 @@ class LEDSeq2SeqLMOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -1381,7 +1381,7 @@ class LEDSeq2SeqSequenceClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -1450,8 +1450,8 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - start_logits: torch.FloatTensor = None - end_logits: torch.FloatTensor = None + start_logits: Optional[torch.FloatTensor] = None + end_logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index fe6c4a8986..25f7a2e5f5 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1470,7 +1470,7 @@ class TFLEDEncoderBaseModelOutput(ModelOutput): in the sequence. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None global_attentions: Tuple[tf.Tensor, ...] | None = None @@ -1533,7 +1533,7 @@ class TFLEDSeq2SeqModelOutput(ModelOutput): in the sequence. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None decoder_hidden_states: Tuple[tf.Tensor, ...] | None = None decoder_attentions: Tuple[tf.Tensor, ...] | None = None @@ -1600,7 +1600,7 @@ class TFLEDSeq2SeqLMOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None decoder_hidden_states: Tuple[tf.Tensor, ...] | None = None decoder_attentions: Tuple[tf.Tensor, ...] | None = None diff --git a/src/transformers/models/levit/modeling_levit.py b/src/transformers/models/levit/modeling_levit.py index e1825f7a36..9924ac25de 100644 --- a/src/transformers/models/levit/modeling_levit.py +++ b/src/transformers/models/levit/modeling_levit.py @@ -68,9 +68,9 @@ class LevitForImageClassificationWithTeacherOutput(ModelOutput): plus the initial embedding outputs. """ - logits: torch.FloatTensor = None - cls_logits: torch.FloatTensor = None - distillation_logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + cls_logits: Optional[torch.FloatTensor] = None + distillation_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -551,7 +551,7 @@ class LevitModel(LevitPreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, BaseModelOutputWithPoolingAndNoAttention]: @@ -618,7 +618,7 @@ class LevitForImageClassification(LevitPreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -710,7 +710,7 @@ class LevitForImageClassificationWithTeacher(LevitPreTrainedModel): ) def forward( self, - pixel_values: torch.FloatTensor = None, + pixel_values: Optional[torch.FloatTensor] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, LevitForImageClassificationWithTeacherOutput]: diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index d64dd9b7b6..54a2981f6d 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -522,7 +522,7 @@ class LlamaModel(LlamaPreTrainedModel): @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -797,7 +797,7 @@ class LlamaForCausalLM(LlamaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/llava/image_processing_llava.py b/src/transformers/models/llava/image_processing_llava.py index c78d1c2867..37ef079c91 100644 --- a/src/transformers/models/llava/image_processing_llava.py +++ b/src/transformers/models/llava/image_processing_llava.py @@ -279,7 +279,7 @@ class LlavaImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_pad: bool = None, + do_pad: Optional[bool] = None, do_resize: Optional[bool] = None, size: Optional[Dict[str, int]] = None, resample: Optional[PILImageResampling] = None, diff --git a/src/transformers/models/llava/modeling_llava.py b/src/transformers/models/llava/modeling_llava.py index b20ecf2ca9..c1d075b641 100644 --- a/src/transformers/models/llava/modeling_llava.py +++ b/src/transformers/models/llava/modeling_llava.py @@ -78,7 +78,7 @@ class LlavaCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -329,8 +329,8 @@ class LlavaForConditionalGeneration(LlavaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=LlavaCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -344,7 +344,7 @@ class LlavaForConditionalGeneration(LlavaPreTrainedModel, GenerationMixin): return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, logits_to_keep: Union[int, torch.Tensor] = 0, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **lm_kwargs, ) -> Union[Tuple, LlavaCausalLMOutputWithPast]: r""" diff --git a/src/transformers/models/llava_next/image_processing_llava_next.py b/src/transformers/models/llava_next/image_processing_llava_next.py index ff8b36c452..c212a549fc 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next.py +++ b/src/transformers/models/llava_next/image_processing_llava_next.py @@ -329,14 +329,14 @@ class LlavaNextImageProcessor(BaseImageProcessor): def _preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, @@ -558,19 +558,19 @@ class LlavaNextImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, image_grid_pinpoints: List = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/llava_next/modeling_llava_next.py b/src/transformers/models/llava_next/modeling_llava_next.py index 1b0b4b93c8..06fc6bfedb 100644 --- a/src/transformers/models/llava_next/modeling_llava_next.py +++ b/src/transformers/models/llava_next/modeling_llava_next.py @@ -185,7 +185,7 @@ class LlavaNextCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -542,8 +542,8 @@ class LlavaNextForConditionalGeneration(LlavaNextPreTrainedModel, GenerationMixi @replace_return_docstrings(output_type=LlavaNextCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py index 9aa09e9673..139852324a 100644 --- a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py @@ -179,17 +179,17 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor): def _preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> list[np.ndarray]: @@ -279,17 +279,17 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor): def preprocess( self, images: VideoInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/llava_next_video/modeling_llava_next_video.py b/src/transformers/models/llava_next_video/modeling_llava_next_video.py index 6d86d9c4d4..bf30ff17c0 100644 --- a/src/transformers/models/llava_next_video/modeling_llava_next_video.py +++ b/src/transformers/models/llava_next_video/modeling_llava_next_video.py @@ -86,7 +86,7 @@ class LlavaNextVideoCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -581,9 +581,9 @@ class LlavaNextVideoForConditionalGeneration(LlavaNextVideoPreTrainedModel, Gene @replace_return_docstrings(output_type=LlavaNextVideoCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_next_video/modular_llava_next_video.py b/src/transformers/models/llava_next_video/modular_llava_next_video.py index 804f6f5835..8168682ad7 100644 --- a/src/transformers/models/llava_next_video/modular_llava_next_video.py +++ b/src/transformers/models/llava_next_video/modular_llava_next_video.py @@ -340,9 +340,9 @@ class LlavaNextVideoForConditionalGeneration(LlavaNextForConditionalGeneration): def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py index a7408ca4dd..23e03483f2 100644 --- a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py @@ -453,15 +453,15 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): def _preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Image.Image: @@ -528,17 +528,17 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, image_grid_pinpoints: List = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/llava_onevision/modeling_llava_onevision.py b/src/transformers/models/llava_onevision/modeling_llava_onevision.py index 5d41f8489e..31d5b9edb6 100644 --- a/src/transformers/models/llava_onevision/modeling_llava_onevision.py +++ b/src/transformers/models/llava_onevision/modeling_llava_onevision.py @@ -191,7 +191,7 @@ class LlavaOnevisionCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -601,10 +601,10 @@ class LlavaOnevisionForConditionalGeneration(LlavaOnevisionPreTrainedModel, Gene @add_start_docstrings(LLAVA_ONEVISION_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, image_sizes: Optional[torch.LongTensor] = None, - pixel_values_videos: torch.FloatTensor = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, image_sizes_videos: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/llava_onevision/video_processing_llava_onevision.py b/src/transformers/models/llava_onevision/video_processing_llava_onevision.py index 743e9f2df6..14307470e4 100644 --- a/src/transformers/models/llava_onevision/video_processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/video_processing_llava_onevision.py @@ -109,15 +109,15 @@ class LlavaOnevisionVideoProcessor(BaseImageProcessor): def _preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> list[np.ndarray]: @@ -200,15 +200,15 @@ class LlavaOnevisionVideoProcessor(BaseImageProcessor): def preprocess( self, videos: VideoInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index ca87b37c65..9eefa02a66 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -128,7 +128,7 @@ class LongformerBaseModelOutputWithPooling(ModelOutput): """ last_hidden_state: torch.FloatTensor - pooler_output: torch.FloatTensor = None + pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -174,7 +174,7 @@ class LongformerMaskedLMOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -222,8 +222,8 @@ class LongformerQuestionAnsweringModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - start_logits: torch.FloatTensor = None - end_logits: torch.FloatTensor = None + start_logits: Optional[torch.FloatTensor] = None + end_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -269,7 +269,7 @@ class LongformerSequenceClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -317,7 +317,7 @@ class LongformerMultipleChoiceModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -363,7 +363,7 @@ class LongformerTokenClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 41eb0ae34b..9280838de0 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -93,7 +93,7 @@ class TFLongformerBaseModelOutput(ModelOutput): in the sequence. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None global_attentions: Tuple[tf.Tensor, ...] | None = None @@ -140,8 +140,8 @@ class TFLongformerBaseModelOutputWithPooling(ModelOutput): in the sequence. """ - last_hidden_state: tf.Tensor = None - pooler_output: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None + pooler_output: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None global_attentions: Tuple[tf.Tensor, ...] | None = None @@ -187,7 +187,7 @@ class TFLongformerMaskedLMOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None global_attentions: Tuple[tf.Tensor, ...] | None = None @@ -235,8 +235,8 @@ class TFLongformerQuestionAnsweringModelOutput(ModelOutput): """ loss: tf.Tensor | None = None - start_logits: tf.Tensor = None - end_logits: tf.Tensor = None + start_logits: Optional[tf.Tensor] = None + end_logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None global_attentions: Tuple[tf.Tensor, ...] | None = None @@ -282,7 +282,7 @@ class TFLongformerSequenceClassifierOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None global_attentions: Tuple[tf.Tensor, ...] | None = None @@ -330,7 +330,7 @@ class TFLongformerMultipleChoiceModelOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None global_attentions: Tuple[tf.Tensor, ...] | None = None @@ -376,7 +376,7 @@ class TFLongformerTokenClassifierOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None global_attentions: Tuple[tf.Tensor, ...] | None = None diff --git a/src/transformers/models/luke/modeling_luke.py b/src/transformers/models/luke/modeling_luke.py index 4665ff0f0e..7bc27bcd9c 100644 --- a/src/transformers/models/luke/modeling_luke.py +++ b/src/transformers/models/luke/modeling_luke.py @@ -71,7 +71,7 @@ class BaseLukeModelOutputWithPooling(BaseModelOutputWithPooling): compute the weighted average in the self-attention heads. """ - entity_last_hidden_state: torch.FloatTensor = None + entity_last_hidden_state: Optional[torch.FloatTensor] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -102,7 +102,7 @@ class BaseLukeModelOutput(BaseModelOutput): heads. """ - entity_last_hidden_state: torch.FloatTensor = None + entity_last_hidden_state: Optional[torch.FloatTensor] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -142,8 +142,8 @@ class LukeMaskedLMOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None mlm_loss: Optional[torch.FloatTensor] = None mep_loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - entity_logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + entity_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -174,7 +174,7 @@ class EntityClassificationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -205,7 +205,7 @@ class EntityPairClassificationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -236,7 +236,7 @@ class EntitySpanClassificationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -270,7 +270,7 @@ class LukeSequenceClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -304,7 +304,7 @@ class LukeTokenClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -340,8 +340,8 @@ class LukeQuestionAnsweringModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - start_logits: torch.FloatTensor = None - end_logits: torch.FloatTensor = None + start_logits: Optional[torch.FloatTensor] = None + end_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -377,7 +377,7 @@ class LukeMultipleChoiceModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -472,7 +472,10 @@ class LukeEntityEmbeddings(nn.Module): self.dropout = nn.Dropout(config.hidden_dropout_prob) def forward( - self, entity_ids: torch.LongTensor, position_ids: torch.LongTensor, token_type_ids: torch.LongTensor = None + self, + entity_ids: torch.LongTensor, + position_ids: torch.LongTensor, + token_type_ids: Optional[torch.LongTensor] = None, ): if token_type_ids is None: token_type_ids = torch.zeros_like(entity_ids) diff --git a/src/transformers/models/m2m_100/modeling_m2m_100.py b/src/transformers/models/m2m_100/modeling_m2m_100.py index cba5ead876..c4cbc21922 100755 --- a/src/transformers/models/m2m_100/modeling_m2m_100.py +++ b/src/transformers/models/m2m_100/modeling_m2m_100.py @@ -143,7 +143,10 @@ class M2M100SinusoidalPositionalEmbedding(nn.Module): @torch.no_grad() def forward( - self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0 + self, + input_ids: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.Tensor] = None, + past_key_values_length: int = 0, ): if input_ids is not None: bsz, seq_len = input_ids.size() diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py index 6d69e21213..affb6b8b67 100755 --- a/src/transformers/models/marian/modeling_marian.py +++ b/src/transformers/models/marian/modeling_marian.py @@ -658,7 +658,7 @@ class MarianEncoder(MarianPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.LongTensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -824,7 +824,7 @@ class MarianDecoder(MarianPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1124,7 +1124,7 @@ class MarianModel(MarianPreTrainedModel): @replace_return_docstrings(output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.Tensor] = None, @@ -1360,7 +1360,7 @@ class MarianMTModel(MarianPreTrainedModel, GenerationMixin): @add_end_docstrings(MARIAN_GENERATION_EXAMPLE) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.Tensor] = None, @@ -1504,7 +1504,7 @@ class MarianForCausalLM(MarianPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/markuplm/tokenization_markuplm.py b/src/transformers/models/markuplm/tokenization_markuplm.py index 814b6a2ab6..26ba704150 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm.py +++ b/src/transformers/models/markuplm/tokenization_markuplm.py @@ -646,7 +646,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, xpaths: Optional[List[List[List[int]]]] = None, node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, @@ -706,7 +706,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, xpaths: Optional[List[List[List[int]]]] = None, node_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, @@ -760,7 +760,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): def _batch_prepare_for_model( self, batch_text_or_text_pairs, - is_pair: bool = None, + is_pair: Optional[bool] = None, xpaths: Optional[List[List[int]]] = None, node_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/markuplm/tokenization_markuplm_fast.py b/src/transformers/models/markuplm/tokenization_markuplm_fast.py index 49c3bfd034..55d75e3541 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm_fast.py +++ b/src/transformers/models/markuplm/tokenization_markuplm_fast.py @@ -421,7 +421,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, xpaths: Optional[List[List[List[int]]]] = None, node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, @@ -558,7 +558,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, xpaths: Optional[List[List[List[int]]]] = None, node_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index 608d2dacb5..37281afeca 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -160,7 +160,7 @@ class Mask2FormerConfig(PretrainedConfig): init_xavier_std: float = 1.0, use_auxiliary_loss: bool = True, feature_strides: List[int] = [4, 8, 16, 32], - output_auxiliary_logits: bool = None, + output_auxiliary_logits: Optional[bool] = None, backbone: Optional[str] = None, use_pretrained_backbone: bool = False, use_timm_backbone: bool = False, diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index b8ab958e61..5c61431bf0 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -575,13 +575,13 @@ class Mask2FormerImageProcessor(BaseImageProcessor): def _preprocess( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -599,13 +599,13 @@ class Mask2FormerImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[Union[str, ChannelDimension]] = None, @@ -641,7 +641,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor): def _preprocess_mask( self, segmentation_map: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, size_divisor: int = 0, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/mask2former/modeling_mask2former.py b/src/transformers/models/mask2former/modeling_mask2former.py index 6d35d69f5f..e4fba109a0 100644 --- a/src/transformers/models/mask2former/modeling_mask2former.py +++ b/src/transformers/models/mask2former/modeling_mask2former.py @@ -76,7 +76,7 @@ class Mask2FormerPixelDecoderOutput(ModelOutput): """ multi_scale_features: Tuple[torch.FloatTensor] = None - mask_features: torch.FloatTensor = None + mask_features: Optional[torch.FloatTensor] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -105,7 +105,7 @@ class Mask2FormerMaskedAttentionDecoderOutput(BaseModelOutputWithCrossAttentions layernorm. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[torch.FloatTensor] = None masks_queries_logits: Tuple[torch.FloatTensor] = None @@ -137,9 +137,9 @@ class Mask2FormerPixelLevelModuleOutput(ModelOutput): called feature maps) of the model at the output of each stage. """ - encoder_last_hidden_state: torch.FloatTensor = None + encoder_last_hidden_state: Optional[torch.FloatTensor] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_last_hidden_state: torch.FloatTensor = None + decoder_last_hidden_state: Optional[torch.FloatTensor] = None decoder_hidden_states: Tuple[torch.FloatTensor] = None @@ -178,9 +178,9 @@ class Mask2FormerModelOutput(ModelOutput): sequence_length)`. Self attentions weights from transformer decoder. """ - encoder_last_hidden_state: torch.FloatTensor = None - pixel_decoder_last_hidden_state: torch.FloatTensor = None - transformer_decoder_last_hidden_state: torch.FloatTensor = None + encoder_last_hidden_state: Optional[torch.FloatTensor] = None + pixel_decoder_last_hidden_state: Optional[torch.FloatTensor] = None + transformer_decoder_last_hidden_state: Optional[torch.FloatTensor] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None pixel_decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None transformer_decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -234,12 +234,12 @@ class Mask2FormerForUniversalSegmentationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - class_queries_logits: torch.FloatTensor = None - masks_queries_logits: torch.FloatTensor = None + class_queries_logits: Optional[torch.FloatTensor] = None + masks_queries_logits: Optional[torch.FloatTensor] = None auxiliary_logits: Optional[List[Dict[str, torch.FloatTensor]]] = None - encoder_last_hidden_state: torch.FloatTensor = None - pixel_decoder_last_hidden_state: torch.FloatTensor = None - transformer_decoder_last_hidden_state: torch.FloatTensor = None + encoder_last_hidden_state: Optional[torch.FloatTensor] = None + pixel_decoder_last_hidden_state: Optional[torch.FloatTensor] = None + transformer_decoder_last_hidden_state: Optional[torch.FloatTensor] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None pixel_decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None transformer_decoder_hidden_states: Optional[torch.FloatTensor] = None @@ -1004,7 +1004,7 @@ class Mask2FormerPixelDecoderEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_embeddings: torch.Tensor = None, + position_embeddings: Optional[torch.Tensor] = None, reference_points=None, spatial_shapes_list=None, level_start_index=None, @@ -1801,11 +1801,11 @@ class Mask2FormerMaskedAttentionDecoder(nn.Module): def forward( self, - inputs_embeds: torch.Tensor = None, - multi_stage_positional_embeddings: torch.Tensor = None, - pixel_embeddings: torch.Tensor = None, - encoder_hidden_states: torch.Tensor = None, - query_position_embeddings: torch.Tensor = None, + inputs_embeds: Optional[torch.Tensor] = None, + multi_stage_positional_embeddings: Optional[torch.Tensor] = None, + pixel_embeddings: Optional[torch.Tensor] = None, + encoder_hidden_states: Optional[torch.Tensor] = None, + query_position_embeddings: Optional[torch.Tensor] = None, feature_size_list: List = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index 532bbaffdd..b31d032188 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -576,13 +576,13 @@ class MaskFormerImageProcessor(BaseImageProcessor): def _preprocess( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -600,13 +600,13 @@ class MaskFormerImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[Union[str, ChannelDimension]] = None, @@ -642,7 +642,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): def _preprocess_mask( self, segmentation_map: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, size_divisor: int = 0, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index b29672d7de..5c1873b4d6 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -140,7 +140,7 @@ class MaskFormerPixelDecoderOutput(ModelOutput): weighted average in the self-attention heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -235,9 +235,9 @@ class MaskFormerForInstanceSegmentationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - class_queries_logits: torch.FloatTensor = None - masks_queries_logits: torch.FloatTensor = None - auxiliary_logits: torch.FloatTensor = None + class_queries_logits: Optional[torch.FloatTensor] = None + masks_queries_logits: Optional[torch.FloatTensor] = None + auxiliary_logits: Optional[torch.FloatTensor] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None pixel_decoder_last_hidden_state: Optional[torch.FloatTensor] = None transformer_decoder_last_hidden_state: Optional[torch.FloatTensor] = None diff --git a/src/transformers/models/maskformer/modeling_maskformer_swin.py b/src/transformers/models/maskformer/modeling_maskformer_swin.py index 4a8d0b002c..dd3fc11ca1 100644 --- a/src/transformers/models/maskformer/modeling_maskformer_swin.py +++ b/src/transformers/models/maskformer/modeling_maskformer_swin.py @@ -61,8 +61,8 @@ class MaskFormerSwinModelOutputWithPooling(ModelOutput): heads. """ - last_hidden_state: torch.FloatTensor = None - pooler_output: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None hidden_states_spatial_dimensions: Tuple[Tuple[int, int]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -93,7 +93,7 @@ class MaskFormerSwinBaseModelOutput(ModelOutput): heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None hidden_states_spatial_dimensions: Tuple[Tuple[int, int]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index 522c5c8cfc..850fde60d1 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -967,7 +967,7 @@ class MBartEncoder(MBartPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -1153,7 +1153,7 @@ class MBartDecoder(MBartPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1442,7 +1442,7 @@ class MBartModel(MBartPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1570,7 +1570,7 @@ class MBartForConditionalGeneration(MBartPreTrainedModel, GenerationMixin): @add_end_docstrings(MBART_GENERATION_EXAMPLE) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1692,7 +1692,7 @@ class MBartForSequenceClassification(MBartPreTrainedModel): # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1820,7 +1820,7 @@ class MBartForQuestionAnswering(MBartPreTrainedModel): # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward def forward( self, - input_ids: torch.Tensor = None, + input_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1965,7 +1965,7 @@ class MBartForCausalLM(MBartPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/megatron_bert/modeling_megatron_bert.py b/src/transformers/models/megatron_bert/modeling_megatron_bert.py index dba31a7b85..82f2202f47 100755 --- a/src/transformers/models/megatron_bert/modeling_megatron_bert.py +++ b/src/transformers/models/megatron_bert/modeling_megatron_bert.py @@ -751,8 +751,8 @@ class MegatronBertForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - seq_relationship_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + seq_relationship_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/mimi/modeling_mimi.py b/src/transformers/models/mimi/modeling_mimi.py index b76cebc188..858844ad7d 100644 --- a/src/transformers/models/mimi/modeling_mimi.py +++ b/src/transformers/models/mimi/modeling_mimi.py @@ -75,8 +75,8 @@ class MimiOutput(ModelOutput): have their past key value states given to this model). """ - audio_codes: torch.LongTensor = None - audio_values: torch.FloatTensor = None + audio_codes: Optional[torch.LongTensor] = None + audio_values: Optional[torch.FloatTensor] = None encoder_past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None decoder_past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None @@ -97,7 +97,7 @@ class MimiEncoderOutput(ModelOutput): have their past key value states given to this model). """ - audio_codes: torch.LongTensor = None + audio_codes: Optional[torch.LongTensor] = None encoder_past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None @@ -117,7 +117,7 @@ class MimiDecoderOutput(ModelOutput): have their past key value states given to this model). """ - audio_values: torch.FloatTensor = None + audio_values: Optional[torch.FloatTensor] = None decoder_past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None @@ -897,7 +897,7 @@ class MimiTransformerModel(nn.Module): def forward( self, - hidden_states: torch.LongTensor = None, + hidden_states: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -1599,7 +1599,7 @@ class MimiModel(MimiPreTrainedModel): def encode( self, input_values: torch.Tensor, - padding_mask: torch.Tensor = None, + padding_mask: Optional[torch.Tensor] = None, num_quantizers: Optional[float] = None, encoder_past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index b22e92f6f6..bd7d38da28 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -487,7 +487,7 @@ class MistralModel(MistralPreTrainedModel): @add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -786,7 +786,7 @@ class MistralForCausalLM(MistralPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/mistral/modeling_tf_mistral.py b/src/transformers/models/mistral/modeling_tf_mistral.py index 53ae7cec7e..e27453249b 100644 --- a/src/transformers/models/mistral/modeling_tf_mistral.py +++ b/src/transformers/models/mistral/modeling_tf_mistral.py @@ -528,7 +528,7 @@ class TFMistralMainLayer(keras.layers.Layer): @unpack_inputs def call( self, - input_ids: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, attention_mask: Optional[tf.Tensor] = None, position_ids: Optional[tf.Tensor] = None, past_key_values: Optional[List[tf.Tensor]] = None, @@ -770,7 +770,7 @@ class TFMistralModel(TFMistralPreTrainedModel): @add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING) def call( self, - input_ids: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, attention_mask: Optional[tf.Tensor] = None, position_ids: Optional[tf.Tensor] = None, past_key_values: Optional[List[tf.Tensor]] = None, @@ -837,7 +837,7 @@ class TFMistralForCausalLM(TFMistralPreTrainedModel, TFCausalLanguageModelingLos @add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def call( self, - input_ids: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, attention_mask: Optional[tf.Tensor] = None, position_ids: Optional[tf.Tensor] = None, past_key_values: Optional[List[tf.Tensor]] = None, @@ -962,7 +962,7 @@ class TFMistralForSequenceClassification(TFMistralPreTrainedModel, TFSequenceCla @add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def call( self, - input_ids: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, attention_mask: Optional[tf.Tensor] = None, position_ids: Optional[tf.Tensor] = None, past_key_values: Optional[List[tf.Tensor]] = None, diff --git a/src/transformers/models/mistral3/modeling_mistral3.py b/src/transformers/models/mistral3/modeling_mistral3.py index 4ded5efed6..8ef1328466 100644 --- a/src/transformers/models/mistral3/modeling_mistral3.py +++ b/src/transformers/models/mistral3/modeling_mistral3.py @@ -160,7 +160,7 @@ class Mistral3CausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -379,8 +379,8 @@ class Mistral3ForConditionalGeneration(Mistral3PreTrainedModel, GenerationMixin) @replace_return_docstrings(output_type=Mistral3CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -393,7 +393,7 @@ class Mistral3ForConditionalGeneration(Mistral3PreTrainedModel, GenerationMixin) return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, logits_to_keep: Union[int, torch.Tensor] = 0, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **lm_kwargs, ) -> Union[Tuple, Mistral3CausalLMOutputWithPast]: r""" diff --git a/src/transformers/models/mistral3/modular_mistral3.py b/src/transformers/models/mistral3/modular_mistral3.py index 9d1edf97bd..3793bef183 100644 --- a/src/transformers/models/mistral3/modular_mistral3.py +++ b/src/transformers/models/mistral3/modular_mistral3.py @@ -139,8 +139,8 @@ class Mistral3ForConditionalGeneration(LlavaForConditionalGeneration): def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -153,7 +153,7 @@ class Mistral3ForConditionalGeneration(LlavaForConditionalGeneration): return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, logits_to_keep: Union[int, torch.Tensor] = 0, - image_sizes: torch.Tensor = None, + image_sizes: Optional[torch.Tensor] = None, **lm_kwargs, ) -> Union[Tuple, Mistral3CausalLMOutputWithPast]: r""" diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 973fff5f17..013f04ab36 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -609,7 +609,7 @@ class MixtralModel(MixtralPreTrainedModel): @add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1000,7 +1000,7 @@ class MixtralForCausalLM(MixtralPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/mixtral/modular_mixtral.py b/src/transformers/models/mixtral/modular_mixtral.py index 3b470667e9..d94b581477 100644 --- a/src/transformers/models/mixtral/modular_mixtral.py +++ b/src/transformers/models/mixtral/modular_mixtral.py @@ -333,7 +333,7 @@ class MixtralModel(MistralModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -461,7 +461,7 @@ class MixtralForCausalLM(MistralForCausalLM): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/mllama/modeling_mllama.py b/src/transformers/models/mllama/modeling_mllama.py index 096db2d11b..763ac051f9 100644 --- a/src/transformers/models/mllama/modeling_mllama.py +++ b/src/transformers/models/mllama/modeling_mllama.py @@ -197,7 +197,7 @@ class MllamaVisionAttention(nn.Module): self, hidden_state: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - output_attentions: bool = None, + output_attentions: Optional[bool] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: query = self.q_proj(hidden_state) key = self.k_proj(hidden_state) @@ -237,7 +237,7 @@ class MllamaVisionSdpaAttention(MllamaVisionAttention): self, hidden_state: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - output_attentions: bool = None, + output_attentions: Optional[bool] = None, ) -> torch.Tensor: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. if output_attentions: @@ -302,7 +302,7 @@ class MllamaVisionEncoderLayer(nn.Module): self, hidden_state: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - output_attentions: bool = None, + output_attentions: Optional[bool] = None, ): # Self Attention residual = hidden_state @@ -469,7 +469,7 @@ class MllamaTextCrossAttention(nn.Module): past_key_value: Optional[Cache] = None, attention_mask: Optional[torch.Tensor] = None, output_attentions: bool = False, - use_cache: bool = None, + use_cache: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: """Input shape: Batch x Time x Channel""" @@ -537,7 +537,7 @@ class MllamaTextCrossSdpaAttention(MllamaTextCrossAttention): past_key_value: Optional[Cache] = None, attention_mask: Optional[torch.Tensor] = None, output_attentions: bool = False, - use_cache: bool = None, + use_cache: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: """Input shape: Batch x Time x Channel""" @@ -1894,7 +1894,7 @@ class MllamaForCausalLM(MllamaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class="MllamaTextConfig") def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, cross_attention_states: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index 11fde85cf6..9801e19ac3 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -734,8 +734,8 @@ class MobileBertForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - seq_relationship_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + seq_relationship_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 60815e0936..e85c079025 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -1063,8 +1063,8 @@ class TFMobileBertForPreTrainingOutput(ModelOutput): """ loss: tf.Tensor | None = None - prediction_logits: tf.Tensor = None - seq_relationship_logits: tf.Tensor = None + prediction_logits: Optional[tf.Tensor] = None + seq_relationship_logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py index f0092ca029..6c30c3413b 100644 --- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py @@ -171,7 +171,7 @@ class MobileNetV1ImageProcessor(BaseImageProcessor): do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py index c93f95a205..0107e96402 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py @@ -175,7 +175,7 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit.py b/src/transformers/models/mobilevit/image_processing_mobilevit.py index 3a32a79b27..f59c246627 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py @@ -220,14 +220,14 @@ class MobileViTImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_center_crop: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_flip_channel_order: bool = None, + do_flip_channel_order: Optional[bool] = None, data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: @@ -262,9 +262,9 @@ class MobileViTImageProcessor(BaseImageProcessor): def _preprocess_mask( self, segmentation_map: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: @@ -302,14 +302,14 @@ class MobileViTImageProcessor(BaseImageProcessor): self, images: ImageInput, segmentation_maps: Optional[ImageInput] = None, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_center_crop: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_flip_channel_order: bool = None, + do_flip_channel_order: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/modernbert/modeling_modernbert.py b/src/transformers/models/modernbert/modeling_modernbert.py index 4f087ec382..3960b17f4f 100644 --- a/src/transformers/models/modernbert/modeling_modernbert.py +++ b/src/transformers/models/modernbert/modeling_modernbert.py @@ -214,7 +214,7 @@ class ModernBertEmbeddings(nn.Module): return self.drop(self.norm(self.tok_embeddings(input_ids))) def forward( - self, input_ids: torch.LongTensor = None, inputs_embeds: Optional[torch.Tensor] = None + self, input_ids: Optional[torch.LongTensor] = None, inputs_embeds: Optional[torch.Tensor] = None ) -> torch.Tensor: if inputs_embeds is not None: hidden_states = self.drop(self.norm(inputs_embeds)) diff --git a/src/transformers/models/modernbert/modular_modernbert.py b/src/transformers/models/modernbert/modular_modernbert.py index 934931da3b..932d102598 100644 --- a/src/transformers/models/modernbert/modular_modernbert.py +++ b/src/transformers/models/modernbert/modular_modernbert.py @@ -477,7 +477,7 @@ class ModernBertEmbeddings(nn.Module): return self.drop(self.norm(self.tok_embeddings(input_ids))) def forward( - self, input_ids: torch.LongTensor = None, inputs_embeds: Optional[torch.Tensor] = None + self, input_ids: Optional[torch.LongTensor] = None, inputs_embeds: Optional[torch.Tensor] = None ) -> torch.Tensor: if inputs_embeds is not None: hidden_states = self.drop(self.norm(inputs_embeds)) diff --git a/src/transformers/models/moonshine/modeling_moonshine.py b/src/transformers/models/moonshine/modeling_moonshine.py index 040358ad46..09c48edbf2 100644 --- a/src/transformers/models/moonshine/modeling_moonshine.py +++ b/src/transformers/models/moonshine/modeling_moonshine.py @@ -841,7 +841,7 @@ class MoonshineDecoder(MoonshinePreTrainedModel): @add_start_docstrings_to_model_forward(MOONSHINE_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/moonshine/modular_moonshine.py b/src/transformers/models/moonshine/modular_moonshine.py index 02938e73d5..9d6f2c52c5 100644 --- a/src/transformers/models/moonshine/modular_moonshine.py +++ b/src/transformers/models/moonshine/modular_moonshine.py @@ -734,7 +734,7 @@ class MoonshineDecoder(LlamaModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/moshi/modeling_moshi.py b/src/transformers/models/moshi/modeling_moshi.py index fcd4aea931..fcf49f7fc7 100644 --- a/src/transformers/models/moshi/modeling_moshi.py +++ b/src/transformers/models/moshi/modeling_moshi.py @@ -100,7 +100,7 @@ class MoshiConditionalGenerationGenerateOutput(ModelOutput): """ audio_sequences: Optional[torch.Tensor] = None - sequences: torch.LongTensor = None + sequences: Optional[torch.LongTensor] = None sequences_scores: Optional[torch.FloatTensor] = None scores: Optional[Tuple[torch.FloatTensor]] = None logits: Optional[Tuple[torch.FloatTensor]] = None @@ -143,8 +143,8 @@ class MoshiCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -193,13 +193,13 @@ class MoshiConditionalGenerationOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None depth_loss: Optional[torch.FloatTensor] = None - audio_logits: torch.FloatTensor = None + audio_logits: Optional[torch.FloatTensor] = None depth_past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None depth_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None depth_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -220,10 +220,10 @@ class MoshiUnconditionalInput(ModelOutput): 1]`: 1 for tokens that are **not masked**, 0 for tokens that are **masked**. """ - input_ids: torch.LongTensor = None - user_audio_codes: torch.Tensor = None - moshi_audio_codes: torch.Tensor = None - attention_mask: torch.LongTensor = None + input_ids: Optional[torch.LongTensor] = None + user_audio_codes: Optional[torch.Tensor] = None + moshi_audio_codes: Optional[torch.Tensor] = None + attention_mask: Optional[torch.LongTensor] = None # Copied from transformers.models.gemma.modeling_gemma.GemmaRMSNorm with Gemma->Moshi @@ -1091,7 +1091,7 @@ class MoshiDepthDecoder(MoshiPreTrainedModel, GenerationMixin): def forward( self, input_ids: Optional[torch.LongTensor] = None, - last_hidden_state: torch.LongTensor = None, + last_hidden_state: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.BoolTensor] = None, past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -1484,7 +1484,7 @@ class MoshiModel(MoshiPreTrainedModel): @add_start_docstrings_to_model_forward(MOSHI_DECODER_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -1799,7 +1799,7 @@ class MoshiForCausalLM(MoshiPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=MoshiCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/mt5/modeling_mt5.py b/src/transformers/models/mt5/modeling_mt5.py index 2df643b781..f8fbc8e343 100644 --- a/src/transformers/models/mt5/modeling_mt5.py +++ b/src/transformers/models/mt5/modeling_mt5.py @@ -2183,7 +2183,7 @@ class MT5ForSequenceClassification(MT5PreTrainedModel): # Copied from transformers.models.t5.modeling_t5.T5ForSequenceClassification.forward def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py index 82ac64c9a4..151c4e89f3 100644 --- a/src/transformers/models/musicgen/modeling_musicgen.py +++ b/src/transformers/models/musicgen/modeling_musicgen.py @@ -87,8 +87,8 @@ class MusicgenUnconditionalInput(ModelOutput): """ encoder_outputs: Tuple[torch.FloatTensor] = None - attention_mask: torch.LongTensor = None - guidance_scale: float = None + attention_mask: Optional[torch.LongTensor] = None + guidance_scale: Optional[float] = None def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int): @@ -966,7 +966,7 @@ class MusicgenDecoder(MusicgenPreTrainedModel): @add_start_docstrings_to_model_forward(MUSICGEN_DECODER_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1162,7 +1162,7 @@ class MusicgenModel(MusicgenPreTrainedModel): @add_start_docstrings_to_model_forward(MUSICGEN_DECODER_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1250,7 +1250,7 @@ class MusicgenForCausalLM(MusicgenPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index a43ecaa04c..70b914313c 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -97,7 +97,7 @@ class MusicgenMelodyOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -899,7 +899,7 @@ class MusicgenMelodyDecoder(MusicgenMelodyPreTrainedModel): # Ignore copy def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1075,7 +1075,7 @@ class MusicgenMelodyModel(MusicgenMelodyPreTrainedModel): # Ignore copy def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1162,7 +1162,7 @@ class MusicgenMelodyForCausalLM(MusicgenMelodyPreTrainedModel, GenerationMixin): # Ignore copy def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/mvp/modeling_mvp.py b/src/transformers/models/mvp/modeling_mvp.py index ea1d12af0c..2dec79729c 100644 --- a/src/transformers/models/mvp/modeling_mvp.py +++ b/src/transformers/models/mvp/modeling_mvp.py @@ -790,7 +790,7 @@ class MvpEncoder(MvpPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -987,7 +987,7 @@ class MvpDecoder(MvpPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1263,7 +1263,7 @@ class MvpModel(MvpPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1401,7 +1401,7 @@ class MvpForConditionalGeneration(MvpPreTrainedModel, GenerationMixin): @add_end_docstrings(MVP_CONDITIONAL_GENERATION_EXAMPLE) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1523,7 +1523,7 @@ class MvpForSequenceClassification(MvpPreTrainedModel): @add_end_docstrings(MVP_SEQUENCE_CLASSIFICATION_SAMPLE) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1649,7 +1649,7 @@ class MvpForQuestionAnswering(MvpPreTrainedModel): @add_end_docstrings(MVP_QUESTION_ANSWERING_SAMPLE) def forward( self, - input_ids: torch.Tensor = None, + input_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1797,7 +1797,7 @@ class MvpForCausalLM(MvpPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/nemotron/modeling_nemotron.py b/src/transformers/models/nemotron/modeling_nemotron.py index d9cd4df990..cc1a22ab23 100644 --- a/src/transformers/models/nemotron/modeling_nemotron.py +++ b/src/transformers/models/nemotron/modeling_nemotron.py @@ -767,7 +767,7 @@ class NemotronModel(NemotronPreTrainedModel): @add_start_docstrings_to_model_forward(NEMOTRON_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -1044,7 +1044,7 @@ class NemotronForCausalLM(NemotronPreTrainedModel, GenerationMixin): # Ignore copy (doc string different) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/nllb_moe/modeling_nllb_moe.py b/src/transformers/models/nllb_moe/modeling_nllb_moe.py index 56c97c8870..8a2e735f9f 100644 --- a/src/transformers/models/nllb_moe/modeling_nllb_moe.py +++ b/src/transformers/models/nllb_moe/modeling_nllb_moe.py @@ -188,7 +188,10 @@ class NllbMoeSinusoidalPositionalEmbedding(nn.Module): @torch.no_grad() def forward( - self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0 + self, + input_ids: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.Tensor] = None, + past_key_values_length: int = 0, ): if input_ids is not None: bsz, seq_len = input_ids.size() diff --git a/src/transformers/models/nougat/image_processing_nougat.py b/src/transformers/models/nougat/image_processing_nougat.py index 2cbd6e6729..d5251d4ff1 100644 --- a/src/transformers/models/nougat/image_processing_nougat.py +++ b/src/transformers/models/nougat/image_processing_nougat.py @@ -360,16 +360,16 @@ class NougatImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_crop_margin: bool = None, - do_resize: bool = None, + do_crop_margin: Optional[bool] = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_thumbnail: bool = None, - do_align_long_axis: bool = None, - do_pad: bool = None, - do_rescale: bool = None, + do_thumbnail: Optional[bool] = None, + do_align_long_axis: Optional[bool] = None, + do_pad: Optional[bool] = None, + do_rescale: Optional[bool] = None, rescale_factor: Union[int, float] = None, - do_normalize: bool = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/nougat/processing_nougat.py b/src/transformers/models/nougat/processing_nougat.py index 58a13454e8..ca395e261a 100644 --- a/src/transformers/models/nougat/processing_nougat.py +++ b/src/transformers/models/nougat/processing_nougat.py @@ -50,16 +50,16 @@ class NougatProcessor(ProcessorMixin): self, images=None, text=None, - do_crop_margin: bool = None, - do_resize: bool = None, + do_crop_margin: Optional[bool] = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: "PILImageResampling" = None, # noqa: F821 - do_thumbnail: bool = None, - do_align_long_axis: bool = None, - do_pad: bool = None, - do_rescale: bool = None, + do_thumbnail: Optional[bool] = None, + do_align_long_axis: Optional[bool] = None, + do_pad: Optional[bool] = None, + do_rescale: Optional[bool] = None, rescale_factor: Union[int, float] = None, - do_normalize: bool = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional["ChannelDimension"] = "channels_first", # noqa: F821 diff --git a/src/transformers/models/olmo/modeling_olmo.py b/src/transformers/models/olmo/modeling_olmo.py index 749b729b12..d6b772a1af 100644 --- a/src/transformers/models/olmo/modeling_olmo.py +++ b/src/transformers/models/olmo/modeling_olmo.py @@ -498,7 +498,7 @@ class OlmoModel(OlmoPreTrainedModel): @add_start_docstrings_to_model_forward(OLMO_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -773,7 +773,7 @@ class OlmoForCausalLM(OlmoPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/olmo2/modeling_olmo2.py b/src/transformers/models/olmo2/modeling_olmo2.py index 35f0376f2d..4cb4a9fe72 100644 --- a/src/transformers/models/olmo2/modeling_olmo2.py +++ b/src/transformers/models/olmo2/modeling_olmo2.py @@ -499,7 +499,7 @@ class Olmo2Model(Olmo2PreTrainedModel): @add_start_docstrings_to_model_forward(OLMO2_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -774,7 +774,7 @@ class Olmo2ForCausalLM(Olmo2PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/olmoe/modeling_olmoe.py b/src/transformers/models/olmoe/modeling_olmoe.py index 9589e4dd7a..a557530663 100644 --- a/src/transformers/models/olmoe/modeling_olmoe.py +++ b/src/transformers/models/olmoe/modeling_olmoe.py @@ -898,7 +898,7 @@ class OlmoeModel(OlmoePreTrainedModel): # Ignore copy def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -1191,7 +1191,7 @@ class OlmoeForCausalLM(OlmoePreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py b/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py index e95e4a522b..67143cff68 100644 --- a/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/modeling_omdet_turbo.py @@ -68,7 +68,7 @@ class OmDetTurboEncoderOutput(ModelOutput): The extracted states from the Feature Pyramid Network (FPN) and Path Aggregation Network (PAN) of the encoder. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None extracted_states: Tuple[torch.FloatTensor] = None @@ -104,14 +104,14 @@ class OmDetTurboDecoderOutput(ModelOutput): weighted average in the self-attention, cross-attention and multi-scale deformable attention heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_coords: torch.FloatTensor = None - decoder_classes: torch.FloatTensor = None - encoder_coord_logits: torch.FloatTensor = None + decoder_coords: Optional[torch.FloatTensor] = None + decoder_classes: Optional[torch.FloatTensor] = None + encoder_coord_logits: Optional[torch.FloatTensor] = None encoder_class_logits: Tuple[torch.FloatTensor] = None - init_reference_points: torch.FloatTensor = None + init_reference_points: Optional[torch.FloatTensor] = None intermediate_reference_points: Tuple[Tuple[torch.FloatTensor]] = None @@ -157,14 +157,14 @@ class OmDetTurboObjectDetectionOutput(ModelOutput): The number of queried classes for each image. """ - loss: torch.FloatTensor = None - decoder_coord_logits: torch.FloatTensor = None - decoder_class_logits: torch.FloatTensor = None - init_reference_points: torch.FloatTensor = None + loss: Optional[torch.FloatTensor] = None + decoder_coord_logits: Optional[torch.FloatTensor] = None + decoder_class_logits: Optional[torch.FloatTensor] = None + init_reference_points: Optional[torch.FloatTensor] = None intermediate_reference_points: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - encoder_coord_logits: torch.FloatTensor = None + encoder_coord_logits: Optional[torch.FloatTensor] = None encoder_class_logits: Tuple[torch.FloatTensor] = None - encoder_extracted_states: torch.FloatTensor = None + encoder_extracted_states: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -579,7 +579,7 @@ class OmDetTurboEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_embeddings: torch.Tensor = None, + position_embeddings: Optional[torch.Tensor] = None, output_attentions: bool = False, ): """ diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py index cbce106f33..956bd3e7e2 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer.py +++ b/src/transformers/models/oneformer/image_processing_oneformer.py @@ -582,12 +582,12 @@ class OneFormerImageProcessor(BaseImageProcessor): def _preprocess( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -603,12 +603,12 @@ class OneFormerImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[Union[str, ChannelDimension]] = None, @@ -643,7 +643,7 @@ class OneFormerImageProcessor(BaseImageProcessor): def _preprocess_mask( self, segmentation_map: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: diff --git a/src/transformers/models/oneformer/modeling_oneformer.py b/src/transformers/models/oneformer/modeling_oneformer.py index eef4b6a3c2..7c3ecb3611 100644 --- a/src/transformers/models/oneformer/modeling_oneformer.py +++ b/src/transformers/models/oneformer/modeling_oneformer.py @@ -358,7 +358,7 @@ class OneFormerLoss(nn.Module): num_points: int, oversample_ratio: float, importance_sample_ratio: float, - contrastive_temperature: float = None, + contrastive_temperature: Optional[float] = None, ): """ This class computes the losses using the class predictions, mask predictions and the contrastive queries. @@ -754,10 +754,10 @@ class OneFormerTransformerDecoderOutput(BaseModelOutput): Tuple of class and mask predictions from each layer of the transformer decoder. """ - object_queries: torch.FloatTensor = None + object_queries: Optional[torch.FloatTensor] = None contrastive_logits: Optional[torch.FloatTensor] = None - prediction_masks: torch.FloatTensor = None - prediction_class: torch.FloatTensor = None + prediction_masks: Optional[torch.FloatTensor] = None + prediction_class: Optional[torch.FloatTensor] = None auxiliary_predictions: Optional[Tuple[Dict[str, torch.FloatTensor]]] = None @@ -782,7 +782,7 @@ class OneFormerPixelDecoderOutput(ModelOutput): """ multi_scale_features: Tuple[torch.FloatTensor] = None - mask_features: torch.FloatTensor = None + mask_features: Optional[torch.FloatTensor] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -806,7 +806,7 @@ class OneFormerPixelLevelModuleOutput(ModelOutput): encoder_features: List[torch.FloatTensor] = None decoder_features: List[torch.FloatTensor] = None - decoder_last_feature: torch.FloatTensor = None + decoder_last_feature: Optional[torch.FloatTensor] = None @dataclass @@ -849,13 +849,13 @@ class OneFormerModelOutput(ModelOutput): encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None pixel_decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None transformer_decoder_hidden_states: Optional[torch.FloatTensor] = None - transformer_decoder_object_queries: torch.FloatTensor = None + transformer_decoder_object_queries: Optional[torch.FloatTensor] = None transformer_decoder_contrastive_queries: Optional[torch.FloatTensor] = None - transformer_decoder_mask_predictions: torch.FloatTensor = None - transformer_decoder_class_predictions: torch.FloatTensor = None + transformer_decoder_mask_predictions: Optional[torch.FloatTensor] = None + transformer_decoder_class_predictions: Optional[torch.FloatTensor] = None transformer_decoder_auxiliary_predictions: Optional[Tuple[Dict[str, torch.FloatTensor]]] = None text_queries: Optional[torch.FloatTensor] = None - task_token: torch.FloatTensor = None + task_token: Optional[torch.FloatTensor] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -912,19 +912,19 @@ class OneFormerForUniversalSegmentationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - class_queries_logits: torch.FloatTensor = None - masks_queries_logits: torch.FloatTensor = None + class_queries_logits: Optional[torch.FloatTensor] = None + masks_queries_logits: Optional[torch.FloatTensor] = None auxiliary_predictions: List[Dict[str, torch.FloatTensor]] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None pixel_decoder_hidden_states: Optional[List[torch.FloatTensor]] = None transformer_decoder_hidden_states: Optional[torch.FloatTensor] = None - transformer_decoder_object_queries: torch.FloatTensor = None + transformer_decoder_object_queries: Optional[torch.FloatTensor] = None transformer_decoder_contrastive_queries: Optional[torch.FloatTensor] = None - transformer_decoder_mask_predictions: torch.FloatTensor = None - transformer_decoder_class_predictions: torch.FloatTensor = None + transformer_decoder_mask_predictions: Optional[torch.FloatTensor] = None + transformer_decoder_class_predictions: Optional[torch.FloatTensor] = None transformer_decoder_auxiliary_predictions: Optional[List[Dict[str, torch.FloatTensor]]] = None text_queries: Optional[torch.FloatTensor] = None - task_token: torch.FloatTensor = None + task_token: Optional[torch.FloatTensor] = None attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None @@ -1085,7 +1085,7 @@ class OneFormerPixelDecoderEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_embeddings: torch.Tensor = None, + position_embeddings: Optional[torch.Tensor] = None, reference_points=None, spatial_shapes=None, level_start_index=None, @@ -2609,7 +2609,7 @@ class OneFormerTextTransformer(nn.Module): width: int, layers: int, heads: int, - attn_mask: torch.Tensor = None, + attn_mask: Optional[torch.Tensor] = None, use_checkpoint=False, layer_norm_eps=1e-05, ): diff --git a/src/transformers/models/openai/modeling_openai.py b/src/transformers/models/openai/modeling_openai.py index f777d95bf8..595d3e9373 100644 --- a/src/transformers/models/openai/modeling_openai.py +++ b/src/transformers/models/openai/modeling_openai.py @@ -318,8 +318,8 @@ class OpenAIGPTDoubleHeadsModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None mc_loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - mc_logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + mc_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 3dd0a6b86b..3856711d10 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -429,8 +429,8 @@ class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput): heads. """ - logits: tf.Tensor = None - mc_logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None + mc_logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/opt/modeling_opt.py b/src/transformers/models/opt/modeling_opt.py index 729f71a49a..01639f5b66 100644 --- a/src/transformers/models/opt/modeling_opt.py +++ b/src/transformers/models/opt/modeling_opt.py @@ -767,7 +767,7 @@ class OPTDecoder(OPTPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1007,7 +1007,7 @@ class OPTModel(OPTPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1086,7 +1086,7 @@ class OPTForCausalLM(OPTPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/owlv2/image_processing_owlv2.py b/src/transformers/models/owlv2/image_processing_owlv2.py index 77ec2c7192..56c8075ef0 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2.py +++ b/src/transformers/models/owlv2/image_processing_owlv2.py @@ -369,12 +369,12 @@ class Owlv2ImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_pad: bool = None, - do_resize: bool = None, + do_pad: Optional[bool] = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/owlv2/modeling_owlv2.py b/src/transformers/models/owlv2/modeling_owlv2.py index d69bcaa87f..7d83ab0004 100644 --- a/src/transformers/models/owlv2/modeling_owlv2.py +++ b/src/transformers/models/owlv2/modeling_owlv2.py @@ -85,10 +85,10 @@ class Owlv2Output(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None @@ -205,12 +205,12 @@ class Owlv2ObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - objectness_logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None - class_embeds: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + objectness_logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None + class_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None @@ -255,12 +255,12 @@ class Owlv2ImageGuidedObjectDetectionOutput(ModelOutput): The output of the [`Owlv2VisionModel`]. """ - logits: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None - query_image_embeds: torch.FloatTensor = None - target_pred_boxes: torch.FloatTensor = None - query_pred_boxes: torch.FloatTensor = None - class_embeds: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None + query_image_embeds: Optional[torch.FloatTensor] = None + target_pred_boxes: Optional[torch.FloatTensor] = None + query_pred_boxes: Optional[torch.FloatTensor] = None + class_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/owlvit/modeling_owlvit.py b/src/transformers/models/owlvit/modeling_owlvit.py index d9c0e72409..2eb0114cf1 100644 --- a/src/transformers/models/owlvit/modeling_owlvit.py +++ b/src/transformers/models/owlvit/modeling_owlvit.py @@ -85,10 +85,10 @@ class OwlViTOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None @@ -202,11 +202,11 @@ class OwlViTObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None - class_embeds: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None + class_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None @@ -250,12 +250,12 @@ class OwlViTImageGuidedObjectDetectionOutput(ModelOutput): The output of the [`OwlViTVisionModel`]. """ - logits: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None - query_image_embeds: torch.FloatTensor = None - target_pred_boxes: torch.FloatTensor = None - query_pred_boxes: torch.FloatTensor = None - class_embeds: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None + query_image_embeds: Optional[torch.FloatTensor] = None + target_pred_boxes: Optional[torch.FloatTensor] = None + query_pred_boxes: Optional[torch.FloatTensor] = None + class_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/paligemma/modeling_paligemma.py b/src/transformers/models/paligemma/modeling_paligemma.py index f14e2d4535..ef92378e0b 100644 --- a/src/transformers/models/paligemma/modeling_paligemma.py +++ b/src/transformers/models/paligemma/modeling_paligemma.py @@ -55,7 +55,7 @@ def _prepare_4d_causal_attention_mask_with_cache_position( cache_position: torch.Tensor, batch_size: int, is_training: bool = False, - token_type_ids: torch.Tensor = None, + token_type_ids: Optional[torch.Tensor] = None, **kwargs, ): """ @@ -145,7 +145,7 @@ class PaliGemmaCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -339,7 +339,7 @@ class PaliGemmaForConditionalGeneration(PaliGemmaPreTrainedModel, GenerationMixi past_key_values=None, cache_position=None, input_tensor=None, - is_training: bool = None, + is_training: Optional[bool] = None, ): if self.config.text_config._attn_implementation == "flash_attention_2": if attention_mask is not None and 0.0 in attention_mask: @@ -421,8 +421,8 @@ class PaliGemmaForConditionalGeneration(PaliGemmaPreTrainedModel, GenerationMixi @replace_return_docstrings(output_type=PaliGemmaCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, diff --git a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py index 37b7641606..ca88a84a39 100644 --- a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py +++ b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py @@ -1153,7 +1153,7 @@ class PatchTSMixerNOPScaler(nn.Module): self.keepdim = config.keepdim if hasattr(config, "keepdim") else True def forward( - self, data: torch.Tensor, observed_indicator: torch.Tensor = None + self, data: torch.Tensor, observed_indicator: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Parameters: @@ -1181,7 +1181,7 @@ class PatchTSMixerEncoderOutput(ModelOutput): Hidden-states of the model at the output of each layer. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -1283,9 +1283,9 @@ class PatchTSMixerModelOutput(ModelOutput): enabled. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None - patch_input: torch.FloatTensor = None + patch_input: Optional[torch.FloatTensor] = None mask: Optional[torch.FloatTensor] = None loc: Optional[torch.FloatTensor] = None scale: Optional[torch.FloatTensor] = None @@ -1402,8 +1402,8 @@ class PatchTSMixerForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_outputs: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None + prediction_outputs: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -1521,11 +1521,11 @@ class PatchTSMixerForPredictionOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_outputs: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None + prediction_outputs: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None - loc: torch.FloatTensor = None - scale: torch.FloatTensor = None + loc: Optional[torch.FloatTensor] = None + scale: Optional[torch.FloatTensor] = None @dataclass @@ -1539,7 +1539,7 @@ class SamplePatchTSMixerPredictionOutput(ModelOutput): Sampled values from the chosen distribution. """ - sequences: torch.FloatTensor = None + sequences: Optional[torch.FloatTensor] = None @dataclass @@ -1553,7 +1553,7 @@ class SamplePatchTSMixerRegressionOutput(ModelOutput): Sampled values from the chosen distribution. """ - sequences: torch.FloatTensor = None + sequences: Optional[torch.FloatTensor] = None # Copied from transformers.models.time_series_transformer.modeling_time_series_transformer.nll @@ -1817,8 +1817,8 @@ class PatchTSMixerForTimeSeriesClassificationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_outputs: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None + prediction_outputs: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -1859,7 +1859,7 @@ class PatchTSMixerForTimeSeriesClassification(PatchTSMixerPreTrainedModel): def forward( self, past_values: torch.Tensor, - target_values: torch.Tensor = None, + target_values: Optional[torch.Tensor] = None, output_hidden_states: Optional[bool] = False, return_loss: bool = True, return_dict: Optional[bool] = None, @@ -1948,8 +1948,8 @@ class PatchTSMixerForRegressionOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - regression_outputs: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None + regression_outputs: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -2049,7 +2049,7 @@ class PatchTSMixerForRegression(PatchTSMixerPreTrainedModel): def forward( self, past_values: torch.Tensor, - target_values: torch.Tensor = None, + target_values: Optional[torch.Tensor] = None, output_hidden_states: Optional[bool] = False, return_loss: bool = True, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/patchtst/modeling_patchtst.py b/src/transformers/models/patchtst/modeling_patchtst.py index 645bbfbbd1..ae09d410ac 100755 --- a/src/transformers/models/patchtst/modeling_patchtst.py +++ b/src/transformers/models/patchtst/modeling_patchtst.py @@ -813,13 +813,13 @@ class PatchTSTModelOutput(ModelOutput): Patched input to the Transformer """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None - mask: torch.FloatTensor = None - loc: torch.FloatTensor = None - scale: torch.FloatTensor = None - patch_input: torch.FloatTensor = None + mask: Optional[torch.FloatTensor] = None + loc: Optional[torch.FloatTensor] = None + scale: Optional[torch.FloatTensor] = None + patch_input: Optional[torch.FloatTensor] = None @dataclass @@ -846,7 +846,7 @@ class PatchTSTForPretrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_output: torch.FloatTensor = None + prediction_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -875,7 +875,7 @@ class PatchTSTForRegressionOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - regression_outputs: torch.FloatTensor = None + regression_outputs: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -908,11 +908,11 @@ class PatchTSTForPredictionOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_outputs: torch.FloatTensor = None + prediction_outputs: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None - loc: torch.FloatTensor = None - scale: torch.FloatTensor = None + loc: Optional[torch.FloatTensor] = None + scale: Optional[torch.FloatTensor] = None @dataclass @@ -940,7 +940,7 @@ class PatchTSTForClassificationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -956,7 +956,7 @@ class SamplePatchTSTOutput(ModelOutput): Sampled values from the chosen distribution. """ - sequences: torch.FloatTensor = None + sequences: Optional[torch.FloatTensor] = None # Copied from transformers.models.time_series_transformer.modeling_time_series_transformer.nll @@ -1095,7 +1095,7 @@ class PatchTSTNOPScaler(nn.Module): self.keepdim = config.keepdim if hasattr(config, "keepdim") else True def forward( - self, data: torch.Tensor, observed_indicator: torch.Tensor = None + self, data: torch.Tensor, observed_indicator: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Parameters: @@ -1457,7 +1457,7 @@ class PatchTSTForClassification(PatchTSTPreTrainedModel): def forward( self, past_values: torch.Tensor, - target_values: torch.Tensor = None, + target_values: Optional[torch.Tensor] = None, past_observed_mask: Optional[bool] = None, output_hidden_states: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1910,7 +1910,7 @@ class PatchTSTForRegression(PatchTSTPreTrainedModel): def forward( self, past_values: torch.Tensor, - target_values: torch.Tensor = None, + target_values: Optional[torch.Tensor] = None, past_observed_mask: Optional[torch.Tensor] = None, output_hidden_states: Optional[bool] = None, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py index 11bb17cd28..b4c5a0acdc 100755 --- a/src/transformers/models/pegasus/modeling_pegasus.py +++ b/src/transformers/models/pegasus/modeling_pegasus.py @@ -1478,7 +1478,7 @@ class PegasusForCausalLM(PegasusPreTrainedModel, GenerationMixin): # Copied from transformers.models.bart.modeling_bart.BartForCausalLM.forward with Bart->Pegasus, facebook/bart-base->google/pegasus-large def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index c7212ec3a7..047fd5b6ba 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -77,8 +77,8 @@ class PerceiverModelOutput(ModelOutput): used to compute the weighted average in the cross-attention heads. """ - logits: torch.FloatTensor = None - last_hidden_state: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -98,7 +98,7 @@ class PerceiverDecoderOutput(ModelOutput): used to compute the weighted average in the cross-attention heads. """ - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -127,7 +127,7 @@ class PerceiverMaskedLMOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -159,7 +159,7 @@ class PerceiverClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -2862,7 +2862,7 @@ class PerceiverFourierPositionEncoding(PerceiverAbstractPositionEncoding): batch_size: int, device: torch.device, dtype: torch.dtype, - pos: torch.FloatTensor = None, + pos: Optional[torch.FloatTensor] = None, ) -> torch.FloatTensor: pos = _check_or_build_spatial_positions(pos, index_dims, batch_size) fourier_pos_enc = generate_fourier_features( diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index 5c9c62cc46..111ca31433 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -557,7 +557,7 @@ class PersimmonModel(PersimmonPreTrainedModel): @add_start_docstrings_to_model_forward(PERSIMMON_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -849,7 +849,7 @@ class PersimmonForCausalLM(PersimmonPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 539be9216d..1bd445366e 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -495,7 +495,7 @@ class PhiModel(PhiPreTrainedModel): @add_start_docstrings_to_model_forward(PHI_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -767,7 +767,7 @@ class PhiForCausalLM(PhiPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/phi/modular_phi.py b/src/transformers/models/phi/modular_phi.py index e01d433aa1..cb9b158338 100644 --- a/src/transformers/models/phi/modular_phi.py +++ b/src/transformers/models/phi/modular_phi.py @@ -181,7 +181,7 @@ class PhiModel(LlamaModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/phi3/modeling_phi3.py b/src/transformers/models/phi3/modeling_phi3.py index 0d8238683e..24e86bca14 100644 --- a/src/transformers/models/phi3/modeling_phi3.py +++ b/src/transformers/models/phi3/modeling_phi3.py @@ -560,7 +560,7 @@ class Phi3Model(Phi3PreTrainedModel): @add_start_docstrings_to_model_forward(PHI3_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -859,7 +859,7 @@ class Phi3ForCausalLM(Phi3PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py b/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py index ea1a7384f5..ed38b65ef7 100644 --- a/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +++ b/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py @@ -1842,7 +1842,7 @@ class Phi4MultimodalModel(Phi4MultimodalPreTrainedModel): @add_start_docstrings_to_model_forward(PHI4_MULTIMODAL_MODEL_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -2149,7 +2149,7 @@ class Phi4MultimodalForCausalLM(Phi4MultimodalPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=Phi4MultimodalConfig) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py b/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py index c9e9f209ad..901cfa27b0 100644 --- a/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py +++ b/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py @@ -1549,7 +1549,7 @@ class Phi4MultimodalModel(Phi3Model, nn.Module): @add_start_docstrings_to_model_forward(PHI4_MULTIMODAL_MODEL_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1684,7 +1684,7 @@ class Phi4MultimodalForCausalLM(Phi3ForCausalLM, nn.Module): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=Phi4MultimodalConfig) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/phimoe/modeling_phimoe.py b/src/transformers/models/phimoe/modeling_phimoe.py index 89e82b7d6b..9cbfe776bb 100644 --- a/src/transformers/models/phimoe/modeling_phimoe.py +++ b/src/transformers/models/phimoe/modeling_phimoe.py @@ -1036,7 +1036,7 @@ class PhimoeModel(PhimoePreTrainedModel): @add_start_docstrings_to_model_forward(PHIMOE_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1366,7 +1366,7 @@ class PhimoeForCausalLM(PhimoePreTrainedModel, GenerationMixin): # Ignore copy def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/pix2struct/image_processing_pix2struct.py b/src/transformers/models/pix2struct/image_processing_pix2struct.py index 386ac83b61..e9db5175b2 100644 --- a/src/transformers/models/pix2struct/image_processing_pix2struct.py +++ b/src/transformers/models/pix2struct/image_processing_pix2struct.py @@ -349,7 +349,7 @@ class Pix2StructImageProcessor(BaseImageProcessor): self, images: ImageInput, header_text: Optional[str] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, do_normalize: Optional[bool] = None, max_patches: Optional[int] = None, patch_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/pixtral/image_processing_pixtral.py b/src/transformers/models/pixtral/image_processing_pixtral.py index 2cb452863a..074a8d1076 100644 --- a/src/transformers/models/pixtral/image_processing_pixtral.py +++ b/src/transformers/models/pixtral/image_processing_pixtral.py @@ -319,16 +319,16 @@ class PixtralImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, patch_size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/plbart/modeling_plbart.py b/src/transformers/models/plbart/modeling_plbart.py index e2f11d97b8..e625c20251 100644 --- a/src/transformers/models/plbart/modeling_plbart.py +++ b/src/transformers/models/plbart/modeling_plbart.py @@ -701,7 +701,7 @@ class PLBartEncoder(PLBartPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -887,7 +887,7 @@ class PLBartDecoder(PLBartPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1421,7 +1421,7 @@ class PLBartForSequenceClassification(PLBartPreTrainedModel): # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -1570,7 +1570,7 @@ class PLBartForCausalLM(PLBartPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index 61061ec1f5..cd4c4bb770 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -213,15 +213,15 @@ class PoolFormerImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, crop_pct: Optional[int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/pop2piano/modeling_pop2piano.py b/src/transformers/models/pop2piano/modeling_pop2piano.py index 2e4734aed8..73ff1b9d7b 100644 --- a/src/transformers/models/pop2piano/modeling_pop2piano.py +++ b/src/transformers/models/pop2piano/modeling_pop2piano.py @@ -1214,7 +1214,7 @@ class Pop2PianoForConditionalGeneration(Pop2PianoPreTrainedModel, GenerationMixi input_features: torch.FloatTensor, composer: str, generation_config: GenerationConfig, - attention_mask: torch.FloatTensor = None, + attention_mask: Optional[torch.FloatTensor] = None, ): """ This method is used to concatenate mel conditioner tokens at the front of the input_features in order to diff --git a/src/transformers/models/pop2piano/tokenization_pop2piano.py b/src/transformers/models/pop2piano/tokenization_pop2piano.py index 18adb2e962..678a651fee 100644 --- a/src/transformers/models/pop2piano/tokenization_pop2piano.py +++ b/src/transformers/models/pop2piano/tokenization_pop2piano.py @@ -245,7 +245,9 @@ class Pop2PianoTokenizer(PreTrainedTokenizer): # Taken from the original code # Please see https://github.com/sweetcocoa/pop2piano/blob/fac11e8dcfc73487513f4588e8d0c22a22f2fdc5/midi_tokenizer.py#L257 - def relative_tokens_ids_to_notes(self, tokens: np.ndarray, start_idx: float, cutoff_time_idx: float = None): + def relative_tokens_ids_to_notes( + self, tokens: np.ndarray, start_idx: float, cutoff_time_idx: Optional[float] = None + ): """ Converts relative tokens to notes which will then be used to create Pretty Midi objects. diff --git a/src/transformers/models/prophetnet/modeling_prophetnet.py b/src/transformers/models/prophetnet/modeling_prophetnet.py index c7230ddc7a..ddb72211b0 100644 --- a/src/transformers/models/prophetnet/modeling_prophetnet.py +++ b/src/transformers/models/prophetnet/modeling_prophetnet.py @@ -308,7 +308,7 @@ class ProphetNetSeq2SeqLMOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None logits_ngram: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[torch.FloatTensor]] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -528,7 +528,7 @@ class ProphetNetDecoderLMOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None logits_ngram: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index 5d5464dcae..586d804372 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -500,7 +500,7 @@ class Qwen2Model(Qwen2PreTrainedModel): @add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -799,7 +799,7 @@ class Qwen2ForCausalLM(Qwen2PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py index 62fb093bc9..21a7b710cf 100644 --- a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py @@ -1114,7 +1114,7 @@ class Qwen2_5_VLModel(Qwen2_5_VLPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1414,7 +1414,7 @@ class Qwen2_5_VLCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1712,7 +1712,7 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2_5_VLPreTrainedModel, GenerationMi @replace_return_docstrings(output_type=Qwen2_5_VLCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py index 10100d8f33..925d59df2f 100644 --- a/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py @@ -585,7 +585,7 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2VLForConditionalGeneration): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py b/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py index 37d38c429a..e375272af9 100644 --- a/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py +++ b/src/transformers/models/qwen2_audio/modeling_qwen2_audio.py @@ -83,7 +83,7 @@ class Qwen2AudioCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[Cache] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1020,8 +1020,8 @@ class Qwen2AudioForConditionalGeneration(Qwen2AudioPreTrainedModel, GenerationMi @replace_return_docstrings(output_type=Qwen2AudioCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - input_features: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + input_features: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, feature_attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index 5e5be0a8f1..b7075fba3b 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -926,7 +926,7 @@ class Qwen2MoeModel(Qwen2MoePreTrainedModel): @add_start_docstrings_to_model_forward(QWEN2MOE_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1251,7 +1251,7 @@ class Qwen2MoeForCausalLM(Qwen2MoePreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py index 81136cb00a..10bc8bc69a 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py @@ -169,18 +169,18 @@ class Qwen2VLImageProcessor(BaseImageProcessor): def _preprocess( self, images: Union[ImageInput, VideoInput], - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, patch_size: Optional[int] = None, temporal_patch_size: Optional[int] = None, merge_size: Optional[int] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ): @@ -302,20 +302,20 @@ class Qwen2VLImageProcessor(BaseImageProcessor): self, images: ImageInput, videos: VideoInput = None, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, min_pixels: Optional[int] = None, max_pixels: Optional[int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, patch_size: Optional[int] = None, temporal_patch_size: Optional[int] = None, merge_size: Optional[int] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py index 21084b1dd3..661f5ed8b1 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py @@ -255,12 +255,12 @@ class Qwen2VLImageProcessorFast(BaseImageProcessorFast): self, images: ImageInput, videos: VideoInput = None, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: Optional[Union["PILImageResampling", "F.InterpolationMode"]] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, min_pixels: Optional[int] = None, @@ -268,7 +268,7 @@ class Qwen2VLImageProcessorFast(BaseImageProcessorFast): patch_size: Optional[int] = None, temporal_patch_size: Optional[int] = None, merge_size: Optional[int] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py index b0c485a9e9..6680a0cc8b 100644 --- a/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py @@ -87,7 +87,7 @@ class Qwen2VLCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1067,7 +1067,7 @@ class Qwen2VLModel(Qwen2VLPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1598,7 +1598,7 @@ class Qwen2VLForConditionalGeneration(Qwen2VLPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=Qwen2VLCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/qwen3/modeling_qwen3.py b/src/transformers/models/qwen3/modeling_qwen3.py index 0559ec789b..30abb9701d 100644 --- a/src/transformers/models/qwen3/modeling_qwen3.py +++ b/src/transformers/models/qwen3/modeling_qwen3.py @@ -527,7 +527,7 @@ class Qwen3Model(Qwen3PreTrainedModel): @add_start_docstrings_to_model_forward(QWEN3_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, @@ -826,7 +826,7 @@ class Qwen3ForCausalLM(Qwen3PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py b/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py index aa1418bd1c..b9ffae15a2 100644 --- a/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py +++ b/src/transformers/models/qwen3_moe/modeling_qwen3_moe.py @@ -622,7 +622,7 @@ class Qwen3MoeModel(Qwen3MoePreTrainedModel): @add_start_docstrings_to_model_forward(QWEN3_MOE_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1013,7 +1013,7 @@ class Qwen3MoeForCausalLM(Qwen3MoePreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/qwen3_moe/modular_qwen3_moe.py b/src/transformers/models/qwen3_moe/modular_qwen3_moe.py index 3e80e28a0c..d8a5cc54f2 100644 --- a/src/transformers/models/qwen3_moe/modular_qwen3_moe.py +++ b/src/transformers/models/qwen3_moe/modular_qwen3_moe.py @@ -244,7 +244,7 @@ class Qwen3MoeForCausalLM(MixtralForCausalLM): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index c44ba0f4ff..c2258d9767 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -112,8 +112,8 @@ class RetrievAugLMMarginOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - doc_scores: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + doc_scores: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None retrieved_doc_embeds: Optional[torch.FloatTensor] = None retrieved_doc_ids: Optional[torch.LongTensor] = None @@ -202,8 +202,8 @@ class RetrievAugLMOutput(ModelOutput): weighted average in the cross-attention heads. """ - logits: torch.FloatTensor = None - doc_scores: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + doc_scores: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None retrieved_doc_embeds: Optional[torch.FloatTensor] = None retrieved_doc_ids: Optional[torch.LongTensor] = None diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 5c27ad4aaf..9c670683c9 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -115,7 +115,7 @@ class TFRetrievAugLMMarginOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None doc_scores: tf.Tensor | None = None retrieved_doc_embeds: tf.Tensor | None = None @@ -198,7 +198,7 @@ class TFRetrievAugLMOutput(ModelOutput): average in the self-attention heads. """ - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None past_key_values: List[tf.Tensor] | None = None doc_scores: tf.Tensor | None = None retrieved_doc_embeds: tf.Tensor | None = None diff --git a/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py b/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py index 4ae3de5a83..450da13493 100644 --- a/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +++ b/src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py @@ -492,8 +492,8 @@ class RecurrentGemmaDecoderLayer(nn.Module): activations: torch.Tensor, position_ids: torch.Tensor, attention_mask: torch.Tensor, - cache_position: torch.Tensor = None, - use_cache: bool = None, + cache_position: Optional[torch.Tensor] = None, + use_cache: Optional[bool] = None, ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: raw_activations = activations inputs_normalized = self.temporal_pre_norm(raw_activations) # RMSNorm introduces slight slight differences @@ -677,7 +677,7 @@ class RecurrentGemmaModel(RecurrentGemmaPreTrainedModel): @add_start_docstrings_to_model_forward(RECURRENTGEMMA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, cache_position: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index 0fe930bd81..48be78e7d4 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -1885,7 +1885,7 @@ class ReformerModelWithLMHeadOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_buckets_states: Optional[List[Tuple[torch.LongTensor, torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/rembert/modeling_rembert.py b/src/transformers/models/rembert/modeling_rembert.py index 66ba88b40d..a8942b1e7c 100755 --- a/src/transformers/models/rembert/modeling_rembert.py +++ b/src/transformers/models/rembert/modeling_rembert.py @@ -776,7 +776,7 @@ class RemBertModel(RemBertPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.LongTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -931,7 +931,7 @@ class RemBertForMaskedLM(RemBertPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.LongTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -1028,7 +1028,7 @@ class RemBertForCausalLM(RemBertPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.LongTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, @@ -1164,7 +1164,7 @@ class RemBertForSequenceClassification(RemBertPreTrainedModel): ) def forward( self, - input_ids: torch.FloatTensor = None, + input_ids: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.FloatTensor] = None, @@ -1260,7 +1260,7 @@ class RemBertForMultipleChoice(RemBertPreTrainedModel): ) def forward( self, - input_ids: torch.FloatTensor = None, + input_ids: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.FloatTensor] = None, @@ -1352,7 +1352,7 @@ class RemBertForTokenClassification(RemBertPreTrainedModel): ) def forward( self, - input_ids: torch.FloatTensor = None, + input_ids: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.FloatTensor] = None, @@ -1430,7 +1430,7 @@ class RemBertForQuestionAnswering(RemBertPreTrainedModel): ) def forward( self, - input_ids: torch.FloatTensor = None, + input_ids: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.FloatTensor] = None, diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index 733defb447..4a21ee48d3 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -106,10 +106,10 @@ class TFRemBertEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, past_key_values_length=0, training: bool = False, ) -> tf.Tensor: diff --git a/src/transformers/models/resnet/modeling_tf_resnet.py b/src/transformers/models/resnet/modeling_tf_resnet.py index 4590cccf4c..0f32c04f45 100644 --- a/src/transformers/models/resnet/modeling_tf_resnet.py +++ b/src/transformers/models/resnet/modeling_tf_resnet.py @@ -552,10 +552,10 @@ class TFResNetForImageClassification(TFResNetPreTrainedModel, TFSequenceClassifi @unpack_inputs def call( self, - pixel_values: tf.Tensor = None, - labels: tf.Tensor = None, - output_hidden_states: bool = None, - return_dict: bool = None, + pixel_values: Optional[tf.Tensor] = None, + labels: Optional[tf.Tensor] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, training: bool = False, ) -> Union[Tuple[tf.Tensor], TFImageClassifierOutputWithNoAttention]: r""" diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index 4e7c0be16f..738f8e67e9 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -156,9 +156,9 @@ class TFRoFormerEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, training: bool = False, ) -> tf.Tensor: """ diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr.py b/src/transformers/models/rt_detr/image_processing_rt_detr.py index b3e75972cc..e458de3794 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr.py @@ -477,7 +477,7 @@ class RTDetrImageProcessor(BaseImageProcessor): image: np.ndarray, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: @@ -784,7 +784,7 @@ class RTDetrImageProcessor(BaseImageProcessor): self, images: ImageInput, annotations: Optional[Union[AnnotationType, List[AnnotationType]]] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, do_resize: Optional[bool] = None, size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py index 8e96a13b20..dd0c54cc63 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py @@ -174,7 +174,7 @@ class RTDetrImageProcessorFast(BaseImageProcessorFast): image: torch.Tensor, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: diff --git a/src/transformers/models/rt_detr/modeling_rt_detr.py b/src/transformers/models/rt_detr/modeling_rt_detr.py index d7305d4265..727925fb17 100644 --- a/src/transformers/models/rt_detr/modeling_rt_detr.py +++ b/src/transformers/models/rt_detr/modeling_rt_detr.py @@ -137,10 +137,10 @@ class RTDetrDecoderOutput(ModelOutput): used to compute the weighted average in the cross-attention heads. """ - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_logits: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_logits: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -200,17 +200,17 @@ class RTDetrModelOutput(ModelOutput): Extra dictionary for the denoising related values """ - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_logits: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_logits: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - init_reference_points: torch.FloatTensor = None + init_reference_points: Optional[torch.FloatTensor] = None enc_topk_logits: Optional[torch.FloatTensor] = None enc_topk_bboxes: Optional[torch.FloatTensor] = None enc_outputs_class: Optional[torch.FloatTensor] = None @@ -289,13 +289,13 @@ class RTDetrObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_logits: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_logits: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -586,7 +586,7 @@ class RTDetrEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_embeddings: torch.Tensor = None, + position_embeddings: Optional[torch.Tensor] = None, output_attentions: bool = False, **kwargs, ): diff --git a/src/transformers/models/rt_detr/modular_rt_detr.py b/src/transformers/models/rt_detr/modular_rt_detr.py index 74a849dd4a..fd9913e97e 100644 --- a/src/transformers/models/rt_detr/modular_rt_detr.py +++ b/src/transformers/models/rt_detr/modular_rt_detr.py @@ -188,7 +188,7 @@ class RTDetrImageProcessorFast(DetrImageProcessorFast, BaseImageProcessorFast): image: torch.Tensor, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: diff --git a/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py b/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py index f707f5af27..59a00a6e74 100644 --- a/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +++ b/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py @@ -486,10 +486,10 @@ class RTDetrV2DecoderOutput(ModelOutput): used to compute the weighted average in the cross-attention heads. """ - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_logits: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_logits: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -549,17 +549,17 @@ class RTDetrV2ModelOutput(ModelOutput): Extra dictionary for the denoising related values """ - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_logits: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_logits: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - init_reference_points: torch.FloatTensor = None + init_reference_points: Optional[torch.FloatTensor] = None enc_topk_logits: Optional[torch.FloatTensor] = None enc_topk_bboxes: Optional[torch.FloatTensor] = None enc_outputs_class: Optional[torch.FloatTensor] = None @@ -638,13 +638,13 @@ class RTDetrV2ObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None - last_hidden_state: torch.FloatTensor = None - intermediate_hidden_states: torch.FloatTensor = None - intermediate_logits: torch.FloatTensor = None - intermediate_reference_points: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + intermediate_hidden_states: Optional[torch.FloatTensor] = None + intermediate_logits: Optional[torch.FloatTensor] = None + intermediate_reference_points: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None cross_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -798,7 +798,7 @@ class RTDetrV2EncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - position_embeddings: torch.Tensor = None, + position_embeddings: Optional[torch.Tensor] = None, output_attentions: bool = False, **kwargs, ): diff --git a/src/transformers/models/rwkv/modeling_rwkv.py b/src/transformers/models/rwkv/modeling_rwkv.py index 5636522995..6274464309 100644 --- a/src/transformers/models/rwkv/modeling_rwkv.py +++ b/src/transformers/models/rwkv/modeling_rwkv.py @@ -478,7 +478,7 @@ class RwkvOutput(ModelOutput): heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None state: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -511,7 +511,7 @@ class RwkvCausalLMOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None state: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py index c80a8a290b..5d98674f73 100644 --- a/src/transformers/models/sam/image_processing_sam.py +++ b/src/transformers/models/sam/image_processing_sam.py @@ -296,7 +296,7 @@ class SamImageProcessor(BaseImageProcessor): do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, + do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, diff --git a/src/transformers/models/sam/modeling_sam.py b/src/transformers/models/sam/modeling_sam.py index b25d1b7318..549eb8a317 100644 --- a/src/transformers/models/sam/modeling_sam.py +++ b/src/transformers/models/sam/modeling_sam.py @@ -69,7 +69,7 @@ class SamVisionEncoderOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -103,8 +103,8 @@ class SamImageSegmentationOutput(ModelOutput): heads. """ - iou_scores: torch.FloatTensor = None - pred_masks: torch.FloatTensor = None + iou_scores: Optional[torch.FloatTensor] = None + pred_masks: Optional[torch.FloatTensor] = None vision_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None vision_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None mask_decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -223,7 +223,9 @@ class SamAttention(nn.Module): hidden_states = hidden_states.transpose(1, 2) return hidden_states.reshape(batch // point_batch_size, point_batch_size, n_tokens, n_heads * c_per_head) - def forward(self, query: Tensor, key: Tensor, value: Tensor, attention_similarity: Tensor = None) -> Tensor: + def forward( + self, query: Tensor, key: Tensor, value: Tensor, attention_similarity: Optional[Tensor] = None + ) -> Tensor: # Input projections query = self.q_proj(query) key = self.k_proj(key) @@ -262,7 +264,9 @@ class SamSdpaAttention(SamAttention): def __init__(self, config, downsample_rate=None): super().__init__(config, downsample_rate) - def forward(self, query: Tensor, key: Tensor, value: Tensor, attention_similarity: Tensor = None) -> Tensor: + def forward( + self, query: Tensor, key: Tensor, value: Tensor, attention_similarity: Optional[Tensor] = None + ) -> Tensor: # Input projections query = self.q_proj(query) key = self.k_proj(key) @@ -514,8 +518,8 @@ class SamMaskDecoder(nn.Module): dense_prompt_embeddings: torch.Tensor, multimask_output: bool, output_attentions: Optional[bool] = None, - attention_similarity: torch.Tensor = None, - target_embedding: torch.Tensor = None, + attention_similarity: Optional[torch.Tensor] = None, + target_embedding: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor]: """ Predict masks given image and prompt embeddings. diff --git a/src/transformers/models/sam/modeling_tf_sam.py b/src/transformers/models/sam/modeling_tf_sam.py index d0462499c5..cbe06e0542 100644 --- a/src/transformers/models/sam/modeling_tf_sam.py +++ b/src/transformers/models/sam/modeling_tf_sam.py @@ -71,7 +71,7 @@ class TFSamVisionEncoderOutput(ModelOutput): """ image_embeds: tf.Tensor | None = None - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -105,8 +105,8 @@ class TFSamImageSegmentationOutput(ModelOutput): heads. """ - iou_scores: tf.Tensor = None - pred_masks: tf.Tensor = None + iou_scores: Optional[tf.Tensor] = None + pred_masks: Optional[tf.Tensor] = None vision_hidden_states: Tuple[tf.Tensor, ...] | None = None vision_attentions: Tuple[tf.Tensor, ...] | None = None mask_decoder_attentions: Tuple[tf.Tensor, ...] | None = None diff --git a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py index b4ebc88b0e..fa7c75844f 100755 --- a/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/modeling_seamless_m4t.py @@ -1050,7 +1050,10 @@ class SeamlessM4TSinusoidalPositionalEmbedding(nn.Module): @torch.no_grad() def forward( - self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0 + self, + input_ids: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.Tensor] = None, + past_key_values_length: int = 0, ): if input_ids is not None: bsz, seq_len = input_ids.size() @@ -1685,7 +1688,7 @@ class SeamlessM4TEncoder(SeamlessM4TPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, output_attentions: Optional[bool] = None, @@ -1873,7 +1876,7 @@ class SeamlessM4TDecoder(SeamlessM4TPreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2203,7 +2206,7 @@ class SeamlessM4TTextToUnitForConditionalGeneration(SeamlessM4TPreTrainedModel, @add_start_docstrings_to_model_forward(M4T_TEXT_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2697,7 +2700,7 @@ class SeamlessM4TForTextToText(SeamlessM4TPreTrainedModel, GenerationMixin): @add_start_docstrings_to_model_forward(M4T_TEXT_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2958,7 +2961,7 @@ class SeamlessM4TForSpeechToText(SeamlessM4TPreTrainedModel, GenerationMixin): @add_start_docstrings_to_model_forward(M4T_SPEECH_INPUTS_DOCSTRING) def forward( self, - input_features: torch.LongTensor = None, + input_features: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -3236,7 +3239,7 @@ class SeamlessM4TForTextToSpeech(SeamlessM4TPreTrainedModel, GenerationMixin): @add_start_docstrings_to_model_forward(M4T_TEXT_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -3561,7 +3564,7 @@ class SeamlessM4TForSpeechToSpeech(SeamlessM4TPreTrainedModel, GenerationMixin): @add_start_docstrings_to_model_forward(M4T_SPEECH_INPUTS_DOCSTRING) def forward( self, - input_features: torch.LongTensor = None, + input_features: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py index 7a36633db4..998008bbf5 100644 --- a/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +++ b/src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py @@ -104,7 +104,7 @@ class SeamlessM4Tv2TextToUnitDecoderOutput(ModelOutput): for *masked* """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None padding_mask: Optional[torch.Tensor] = None @@ -153,7 +153,7 @@ class SeamlessM4Tv2TextToUnitOutput(ModelOutput): Language modeling loss. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None padding_mask: Optional[torch.Tensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -999,7 +999,10 @@ class SeamlessM4Tv2SinusoidalPositionalEmbedding(nn.Module): @torch.no_grad() def forward( - self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0 + self, + input_ids: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.Tensor] = None, + past_key_values_length: int = 0, ): if input_ids is not None: bsz, seq_len = input_ids.size() @@ -1799,7 +1802,7 @@ class SeamlessM4Tv2Encoder(SeamlessM4Tv2PreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, output_attentions: Optional[bool] = None, @@ -1988,7 +1991,7 @@ class SeamlessM4Tv2Decoder(SeamlessM4Tv2PreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.LongTensor] = None, @@ -2248,9 +2251,9 @@ class SeamlessM4Tv2TextToUnitDecoder(SeamlessM4Tv2PreTrainedModel): def forward( self, - char_input_ids: torch.LongTensor = None, - char_count_per_id: torch.LongTensor = None, - encoder_hidden_states: torch.FloatTensor = None, + char_input_ids: Optional[torch.LongTensor] = None, + char_count_per_id: Optional[torch.LongTensor] = None, + encoder_hidden_states: Optional[torch.FloatTensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -2380,8 +2383,8 @@ class SeamlessM4Tv2TextToUnitModel(SeamlessM4Tv2PreTrainedModel): def forward( self, input_ids: Optional[torch.LongTensor] = None, - char_input_ids: torch.LongTensor = None, - char_count_per_id: torch.LongTensor = None, + char_input_ids: Optional[torch.LongTensor] = None, + char_count_per_id: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -2499,9 +2502,9 @@ class SeamlessM4Tv2TextToUnitForConditionalGeneration(SeamlessM4Tv2PreTrainedMod @add_start_docstrings_to_model_forward(M4T_TEXT_TO_UNITS_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, - char_input_ids: torch.LongTensor = None, - char_count_per_id: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, + char_input_ids: Optional[torch.LongTensor] = None, + char_count_per_id: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, @@ -2660,7 +2663,7 @@ class SeamlessM4Tv2VariancePredictor(nn.Module): self.ln2 = nn.LayerNorm(hidden_dim) self.proj = nn.Linear(hidden_dim, 1) - def forward(self, hidden_states: Tensor, padding_mask: Tensor = None) -> Tensor: + def forward(self, hidden_states: Tensor, padding_mask: Optional[Tensor] = None) -> Tensor: # Input: B x T x C; Output: B x T if padding_mask is not None: hidden_states = hidden_states.masked_fill(~padding_mask.bool().unsqueeze(-1), 0.0) @@ -2977,7 +2980,7 @@ class SeamlessM4Tv2ForTextToText(SeamlessM4Tv2PreTrainedModel, GenerationMixin): @add_start_docstrings_to_model_forward(M4T_TEXT_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -3247,7 +3250,7 @@ class SeamlessM4Tv2ForSpeechToText(SeamlessM4Tv2PreTrainedModel, GenerationMixin # Copied from transformers.models.seamless_m4t.modeling_seamless_m4t.SeamlessM4TForSpeechToText.forward def forward( self, - input_features: torch.LongTensor = None, + input_features: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -3536,7 +3539,7 @@ class SeamlessM4Tv2ForTextToSpeech(SeamlessM4Tv2PreTrainedModel, GenerationMixin # Copied from transformers.models.seamless_m4t.modeling_seamless_m4t.SeamlessM4TForTextToSpeech.forward with SeamlessM4T->SeamlessM4Tv2 def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, @@ -3902,7 +3905,7 @@ class SeamlessM4Tv2ForSpeechToSpeech(SeamlessM4Tv2PreTrainedModel, GenerationMix # Copied from transformers.models.seamless_m4t.modeling_seamless_m4t.SeamlessM4TForSpeechToSpeech.forward with SeamlessM4T->SeamlessM4Tv2 def forward( self, - input_features: torch.LongTensor = None, + input_features: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py index 51b4cd27ca..b978f70165 100644 --- a/src/transformers/models/segformer/image_processing_segformer.py +++ b/src/transformers/models/segformer/image_processing_segformer.py @@ -219,12 +219,12 @@ class SegformerImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[Union[str, ChannelDimension]] = None, @@ -260,8 +260,8 @@ class SegformerImageProcessor(BaseImageProcessor): def _preprocess_mask( self, segmentation_map: ImageInput, - do_reduce_labels: bool = None, - do_resize: bool = None, + do_reduce_labels: Optional[bool] = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: diff --git a/src/transformers/models/segformer/modeling_segformer.py b/src/transformers/models/segformer/modeling_segformer.py index 3e303b1bff..84a90e5e62 100755 --- a/src/transformers/models/segformer/modeling_segformer.py +++ b/src/transformers/models/segformer/modeling_segformer.py @@ -73,7 +73,7 @@ class SegFormerImageClassifierOutput(ImageClassifierOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/shieldgemma2/modeling_shieldgemma2.py b/src/transformers/models/shieldgemma2/modeling_shieldgemma2.py index 1826e64ff0..3981a1f54d 100644 --- a/src/transformers/models/shieldgemma2/modeling_shieldgemma2.py +++ b/src/transformers/models/shieldgemma2/modeling_shieldgemma2.py @@ -117,7 +117,7 @@ class ShieldGemma2ImageClassifierOutputWithNoAttention(ImageClassifierOutputWith Args: """ - probabilities: torch.Tensor = None + probabilities: Optional[torch.Tensor] = None class ShieldGemma2ForImageClassification(PreTrainedModel): @@ -154,8 +154,8 @@ class ShieldGemma2ForImageClassification(PreTrainedModel): @add_start_docstrings_to_model_forward(SHIELDGEMMA2_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None, diff --git a/src/transformers/models/siglip/image_processing_siglip.py b/src/transformers/models/siglip/image_processing_siglip.py index d582687806..7ec6c36d39 100644 --- a/src/transformers/models/siglip/image_processing_siglip.py +++ b/src/transformers/models/siglip/image_processing_siglip.py @@ -89,7 +89,7 @@ class SiglipImageProcessor(BaseImageProcessor): do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, **kwargs, ) -> None: super().__init__(**kwargs) @@ -111,18 +111,18 @@ class SiglipImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, ) -> PIL.Image.Image: """ Preprocess an image or batch of images. diff --git a/src/transformers/models/siglip/modeling_siglip.py b/src/transformers/models/siglip/modeling_siglip.py index f60feb2ea8..6c488262a5 100644 --- a/src/transformers/models/siglip/modeling_siglip.py +++ b/src/transformers/models/siglip/modeling_siglip.py @@ -169,7 +169,7 @@ class SiglipVisionModelOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -199,7 +199,7 @@ class SiglipTextModelOutput(ModelOutput): """ text_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -228,10 +228,10 @@ class SiglipOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/siglip2/modeling_siglip2.py b/src/transformers/models/siglip2/modeling_siglip2.py index 15d9cffe7d..b478743949 100644 --- a/src/transformers/models/siglip2/modeling_siglip2.py +++ b/src/transformers/models/siglip2/modeling_siglip2.py @@ -75,7 +75,7 @@ class Siglip2VisionOutput(ModelOutput): """ image_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -104,7 +104,7 @@ class Siglip2TextOutput(ModelOutput): """ text_embeds: Optional[torch.FloatTensor] = None - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -132,10 +132,10 @@ class Siglip2Output(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_image: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - image_embeds: torch.FloatTensor = None + logits_per_image: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + image_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/smolvlm/modeling_smolvlm.py b/src/transformers/models/smolvlm/modeling_smolvlm.py index f9cad6fa20..e88e23776b 100644 --- a/src/transformers/models/smolvlm/modeling_smolvlm.py +++ b/src/transformers/models/smolvlm/modeling_smolvlm.py @@ -537,7 +537,7 @@ class SmolVLMBaseModelOutputWithPast(ModelOutput): image_hidden_states of the model produced by the vision encoder """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -764,7 +764,7 @@ class SmolVLMModel(SmolVLMPreTrainedModel): ) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -924,7 +924,7 @@ class SmolVLMCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -987,7 +987,7 @@ class SmolVLMForConditionalGeneration(SmolVLMPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=SmolVLMCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/smolvlm/modular_smolvlm.py b/src/transformers/models/smolvlm/modular_smolvlm.py index e4df055442..051bdfaf5a 100644 --- a/src/transformers/models/smolvlm/modular_smolvlm.py +++ b/src/transformers/models/smolvlm/modular_smolvlm.py @@ -184,7 +184,7 @@ class SmolVLMModel(Idefics3Model): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/splinter/modeling_splinter.py b/src/transformers/models/splinter/modeling_splinter.py index 295a427e6a..174e766598 100755 --- a/src/transformers/models/splinter/modeling_splinter.py +++ b/src/transformers/models/splinter/modeling_splinter.py @@ -964,8 +964,8 @@ class SplinterForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - start_logits: torch.FloatTensor = None - end_logits: torch.FloatTensor = None + start_logits: Optional[torch.FloatTensor] = None + end_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index 8d2b3f96ec..b3c3e2dde9 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -811,7 +811,7 @@ class StableLmModel(StableLmPreTrainedModel): @add_start_docstrings_to_model_forward(STABLELM_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, @@ -1105,7 +1105,7 @@ class StableLmForCausalLM(StableLmPreTrainedModel, GenerationMixin): # Ignore copy def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index 6fbb652c99..e55bba9f23 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -492,7 +492,7 @@ class Starcoder2Model(Starcoder2PreTrainedModel): @add_start_docstrings_to_model_forward(STARCODER2_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, @@ -777,7 +777,7 @@ class Starcoder2ForCausalLM(Starcoder2PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Cache] = None, diff --git a/src/transformers/models/starcoder2/modular_starcoder2.py b/src/transformers/models/starcoder2/modular_starcoder2.py index d6aaa08f2c..1aaf789a50 100644 --- a/src/transformers/models/starcoder2/modular_starcoder2.py +++ b/src/transformers/models/starcoder2/modular_starcoder2.py @@ -159,7 +159,7 @@ class Starcoder2Model(MistralModel): @add_start_docstrings_to_model_forward(STARCODER2_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, diff --git a/src/transformers/models/superglue/image_processing_superglue.py b/src/transformers/models/superglue/image_processing_superglue.py index 567e555807..b84bfc280c 100644 --- a/src/transformers/models/superglue/image_processing_superglue.py +++ b/src/transformers/models/superglue/image_processing_superglue.py @@ -220,12 +220,12 @@ class SuperGlueImageProcessor(BaseImageProcessor): def preprocess( self, images, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_grayscale: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_grayscale: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index cfae68f902..e1ad94613f 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -180,11 +180,11 @@ class SuperPointImageProcessor(BaseImageProcessor): def preprocess( self, images, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_grayscale: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_grayscale: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index 46dff663d1..5de428831e 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -86,7 +86,7 @@ class SwinEncoderOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -121,7 +121,7 @@ class SwinModelOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -158,7 +158,7 @@ class SwinMaskedImageModelingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - reconstruction: torch.FloatTensor = None + reconstruction: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -203,7 +203,7 @@ class SwinImageClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py index 865444f081..c1c4fb1620 100644 --- a/src/transformers/models/swin/modeling_tf_swin.py +++ b/src/transformers/models/swin/modeling_tf_swin.py @@ -91,7 +91,7 @@ class TFSwinEncoderOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None reshaped_hidden_states: Tuple[tf.Tensor, ...] | None = None @@ -126,7 +126,7 @@ class TFSwinModelOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None pooler_output: tf.Tensor | None = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -163,7 +163,7 @@ class TFSwinMaskedImageModelingOutput(ModelOutput): """ loss: tf.Tensor | None = None - reconstruction: tf.Tensor = None + reconstruction: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None reshaped_hidden_states: Tuple[tf.Tensor, ...] | None = None @@ -208,7 +208,7 @@ class TFSwinImageClassifierOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None reshaped_hidden_states: Tuple[tf.Tensor, ...] | None = None @@ -307,7 +307,7 @@ class TFSwinEmbeddings(keras.layers.Layer): self.dropout.build(None) def call( - self, pixel_values: tf.Tensor, bool_masked_pos: bool = None, training: bool = False + self, pixel_values: tf.Tensor, bool_masked_pos: Optional[bool] = None, training: bool = False ) -> Tuple[tf.Tensor, Tuple[int, int]]: embeddings, output_dimensions = self.patch_embeddings(pixel_values, training=training) embeddings = self.norm(embeddings, training=training) @@ -474,7 +474,7 @@ class TFSwinPatchMerging(keras.layers.Layer): class TFSwinDropPath(keras.layers.Layer): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" - def __init__(self, drop_prob: float = None, scale_by_keep: bool = True, **kwargs) -> None: + def __init__(self, drop_prob: Optional[float] = None, scale_by_keep: bool = True, **kwargs) -> None: super(TFSwinDropPath, self).__init__(**kwargs) self.drop_prob = drop_prob self.scale_by_keep = scale_by_keep diff --git a/src/transformers/models/swin2sr/modeling_swin2sr.py b/src/transformers/models/swin2sr/modeling_swin2sr.py index 784367a014..a3ae0ed588 100644 --- a/src/transformers/models/swin2sr/modeling_swin2sr.py +++ b/src/transformers/models/swin2sr/modeling_swin2sr.py @@ -69,7 +69,7 @@ class Swin2SREncoderOutput(ModelOutput): heads. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/swinv2/modeling_swinv2.py b/src/transformers/models/swinv2/modeling_swinv2.py index 46e0a1ca9a..4a5d1bd988 100644 --- a/src/transformers/models/swinv2/modeling_swinv2.py +++ b/src/transformers/models/swinv2/modeling_swinv2.py @@ -87,7 +87,7 @@ class Swinv2EncoderOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -123,7 +123,7 @@ class Swinv2ModelOutput(ModelOutput): include the spatial dimensions. """ - last_hidden_state: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None pooler_output: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -161,7 +161,7 @@ class Swinv2MaskedImageModelingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - reconstruction: torch.FloatTensor = None + reconstruction: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -207,7 +207,7 @@ class Swinv2ImageClassifierOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index 39c8101f92..306944bae1 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -2135,7 +2135,7 @@ class T5ForSequenceClassification(T5PreTrainedModel): @replace_return_docstrings(output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/table_transformer/modeling_table_transformer.py b/src/transformers/models/table_transformer/modeling_table_transformer.py index cf6c6d4daf..25246f3382 100644 --- a/src/transformers/models/table_transformer/modeling_table_transformer.py +++ b/src/transformers/models/table_transformer/modeling_table_transformer.py @@ -172,8 +172,8 @@ class TableTransformerObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None last_hidden_state: Optional[torch.FloatTensor] = None decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -572,7 +572,7 @@ class TableTransformerEncoderLayer(nn.Module): self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, - object_queries: torch.Tensor = None, + object_queries: Optional[torch.Tensor] = None, output_attentions: bool = False, ): """ diff --git a/src/transformers/models/tapas/modeling_tapas.py b/src/transformers/models/tapas/modeling_tapas.py index 95b097013e..048e3fb009 100644 --- a/src/transformers/models/tapas/modeling_tapas.py +++ b/src/transformers/models/tapas/modeling_tapas.py @@ -77,8 +77,8 @@ class TableQuestionAnsweringOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - logits_aggregation: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + logits_aggregation: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index 82430deebf..7e5abdd7fa 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -111,7 +111,7 @@ class TFTableQuestionAnsweringOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None logits_aggregation: tf.Tensor | None = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -170,10 +170,10 @@ class TFTapasEmbeddings(keras.layers.Layer): def call( self, - input_ids: tf.Tensor = None, - position_ids: tf.Tensor = None, - token_type_ids: tf.Tensor = None, - inputs_embeds: tf.Tensor = None, + input_ids: Optional[tf.Tensor] = None, + position_ids: Optional[tf.Tensor] = None, + token_type_ids: Optional[tf.Tensor] = None, + inputs_embeds: Optional[tf.Tensor] = None, training: bool = False, ) -> tf.Tensor: """ diff --git a/src/transformers/models/textnet/image_processing_textnet.py b/src/transformers/models/textnet/image_processing_textnet.py index 1f56d60449..74806a0556 100644 --- a/src/transformers/models/textnet/image_processing_textnet.py +++ b/src/transformers/models/textnet/image_processing_textnet.py @@ -203,18 +203,18 @@ class TextNetImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py b/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py index 9a87d19d16..d63be6d8d7 100644 --- a/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py +++ b/src/transformers/models/time_series_transformer/modeling_time_series_transformer.py @@ -180,7 +180,7 @@ class TimeSeriesNOPScaler(nn.Module): self.keepdim = config.keepdim if hasattr(config, "keepdim") else True def forward( - self, data: torch.Tensor, observed_indicator: torch.Tensor = None + self, data: torch.Tensor, observed_indicator: Optional[torch.Tensor] = None ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Parameters: diff --git a/src/transformers/models/tvp/image_processing_tvp.py b/src/transformers/models/tvp/image_processing_tvp.py index 115433c6c4..85b1202062 100644 --- a/src/transformers/models/tvp/image_processing_tvp.py +++ b/src/transformers/models/tvp/image_processing_tvp.py @@ -266,19 +266,19 @@ class TvpImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, do_pad: bool = True, pad_size: Dict[str, int] = None, constant_values: Union[float, Iterable[float]] = None, pad_mode: PaddingMode = None, - do_normalize: bool = None, - do_flip_channel_order: bool = None, + do_normalize: Optional[bool] = None, + do_flip_channel_order: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, @@ -340,19 +340,19 @@ class TvpImageProcessor(BaseImageProcessor): def preprocess( self, videos: Union[ImageInput, List[ImageInput], List[List[ImageInput]]], - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_pad: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_pad: Optional[bool] = None, pad_size: Dict[str, int] = None, constant_values: Union[float, Iterable[float]] = None, pad_mode: PaddingMode = None, - do_normalize: bool = None, - do_flip_channel_order: bool = None, + do_normalize: Optional[bool] = None, + do_flip_channel_order: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/tvp/modeling_tvp.py b/src/transformers/models/tvp/modeling_tvp.py index 16dd5c0a37..9e93e6e900 100644 --- a/src/transformers/models/tvp/modeling_tvp.py +++ b/src/transformers/models/tvp/modeling_tvp.py @@ -54,7 +54,7 @@ class TvpVideoGroundingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/udop/modeling_udop.py b/src/transformers/models/udop/modeling_udop.py index 8238cd38a9..4f355c9ed5 100644 --- a/src/transformers/models/udop/modeling_udop.py +++ b/src/transformers/models/udop/modeling_udop.py @@ -261,8 +261,8 @@ class BaseModelOutputWithAttentionMask(ModelOutput): used to compute the weighted average in the cross-attention heads. """ - last_hidden_state: torch.FloatTensor = None - attention_mask: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + attention_mask: Optional[torch.FloatTensor] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -1719,8 +1719,8 @@ class UdopModel(UdopPreTrainedModel): @replace_return_docstrings(output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: Tensor = None, - attention_mask: Tensor = None, + input_ids: Optional[Tensor] = None, + attention_mask: Optional[Tensor] = None, bbox: Dict[str, Any] = None, pixel_values: Optional[Tensor] = None, visual_bbox: Dict[str, Any] = None, @@ -1895,8 +1895,8 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: Tensor = None, - attention_mask: Tensor = None, + input_ids: Optional[Tensor] = None, + attention_mask: Optional[Tensor] = None, bbox: Dict[str, Any] = None, pixel_values: Optional[Tensor] = None, visual_bbox: Dict[str, Any] = None, @@ -2108,9 +2108,9 @@ class UdopEncoderModel(UdopPreTrainedModel): @replace_return_docstrings(output_type=BaseModelOutputWithAttentionMask, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: Tensor = None, + input_ids: Optional[Tensor] = None, bbox: Dict[str, Any] = None, - attention_mask: Tensor = None, + attention_mask: Optional[Tensor] = None, pixel_values: Optional[Tensor] = None, visual_bbox: Dict[str, Any] = None, head_mask: Optional[Tensor] = None, diff --git a/src/transformers/models/udop/tokenization_udop.py b/src/transformers/models/udop/tokenization_udop.py index c3d1b4a5c4..86ee6a873d 100644 --- a/src/transformers/models/udop/tokenization_udop.py +++ b/src/transformers/models/udop/tokenization_udop.py @@ -697,7 +697,7 @@ class UdopTokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, @@ -890,7 +890,7 @@ class UdopTokenizer(PreTrainedTokenizer): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, @@ -944,7 +944,7 @@ class UdopTokenizer(PreTrainedTokenizer): def _batch_prepare_for_model_boxes( self, batch_text_or_text_pairs, - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[int]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/udop/tokenization_udop_fast.py b/src/transformers/models/udop/tokenization_udop_fast.py index 2e129bad9a..9992da7bdd 100644 --- a/src/transformers/models/udop/tokenization_udop_fast.py +++ b/src/transformers/models/udop/tokenization_udop_fast.py @@ -446,7 +446,7 @@ class UdopTokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, @@ -524,7 +524,7 @@ class UdopTokenizerFast(PreTrainedTokenizerFast): List[TextInputPair], List[PreTokenizedInput], ], - is_pair: bool = None, + is_pair: Optional[bool] = None, boxes: Optional[List[List[List[int]]]] = None, word_labels: Optional[List[List[int]]] = None, add_special_tokens: bool = True, diff --git a/src/transformers/models/umt5/modeling_umt5.py b/src/transformers/models/umt5/modeling_umt5.py index 8c22500a7c..a25a99653a 100644 --- a/src/transformers/models/umt5/modeling_umt5.py +++ b/src/transformers/models/umt5/modeling_umt5.py @@ -1658,7 +1658,7 @@ class UMT5ForSequenceClassification(UMT5PreTrainedModel): @replace_return_docstrings(output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index ef81069eae..2ee2e054ae 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -92,9 +92,9 @@ class UniSpeechForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - projected_states: torch.FloatTensor = None - projected_quantized_states: torch.FloatTensor = None - codevector_perplexity: torch.FloatTensor = None + projected_states: Optional[torch.FloatTensor] = None + projected_quantized_states: Optional[torch.FloatTensor] = None + codevector_perplexity: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index fb8edc3938..0eb035bac1 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -108,10 +108,10 @@ class UniSpeechSatForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - projected_states: torch.FloatTensor = None - projected_quantized_states: torch.FloatTensor = None - codevector_perplexity: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + projected_states: Optional[torch.FloatTensor] = None + projected_quantized_states: Optional[torch.FloatTensor] = None + codevector_perplexity: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/univnet/modeling_univnet.py b/src/transformers/models/univnet/modeling_univnet.py index 9cfd45e188..3c73625592 100644 --- a/src/transformers/models/univnet/modeling_univnet.py +++ b/src/transformers/models/univnet/modeling_univnet.py @@ -46,8 +46,8 @@ class UnivNetModelOutput(ModelOutput): The batched length in samples of each unpadded waveform in `waveforms`. """ - waveforms: torch.FloatTensor = None - waveform_lengths: torch.FloatTensor = None + waveforms: Optional[torch.FloatTensor] = None + waveform_lengths: Optional[torch.FloatTensor] = None class UnivNetKernelPredictorResidualBlock(nn.Module): diff --git a/src/transformers/models/video_llava/image_processing_video_llava.py b/src/transformers/models/video_llava/image_processing_video_llava.py index c572551579..8b7f489791 100644 --- a/src/transformers/models/video_llava/image_processing_video_llava.py +++ b/src/transformers/models/video_llava/image_processing_video_llava.py @@ -174,17 +174,17 @@ class VideoLlavaImageProcessor(BaseImageProcessor): self, images: List[ImageInput] = None, videos: List[VideoInput] = None, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -331,9 +331,9 @@ class VideoLlavaImageProcessor(BaseImageProcessor): do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: diff --git a/src/transformers/models/video_llava/modeling_video_llava.py b/src/transformers/models/video_llava/modeling_video_llava.py index 19170049b6..24aaee0351 100644 --- a/src/transformers/models/video_llava/modeling_video_llava.py +++ b/src/transformers/models/video_llava/modeling_video_llava.py @@ -78,7 +78,7 @@ class VideoLlavaCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -364,9 +364,9 @@ class VideoLlavaForConditionalGeneration(VideoLlavaPreTrainedModel, GenerationMi @replace_return_docstrings(output_type=VideoLlavaCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, - pixel_values_images: torch.FloatTensor = None, - pixel_values_videos: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values_images: Optional[torch.FloatTensor] = None, + pixel_values_videos: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py index afba947bbd..eac4759af3 100644 --- a/src/transformers/models/videomae/image_processing_videomae.py +++ b/src/transformers/models/videomae/image_processing_videomae.py @@ -178,14 +178,14 @@ class VideoMAEImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, @@ -236,14 +236,14 @@ class VideoMAEImageProcessor(BaseImageProcessor): def preprocess( self, videos: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/videomae/modeling_videomae.py b/src/transformers/models/videomae/modeling_videomae.py index d192e4f8eb..077df5bcf4 100755 --- a/src/transformers/models/videomae/modeling_videomae.py +++ b/src/transformers/models/videomae/modeling_videomae.py @@ -64,7 +64,7 @@ class VideoMAEDecoderOutput(ModelOutput): the self-attention heads. """ - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -90,7 +90,7 @@ class VideoMAEForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py index 07ed544d04..00dc6f5ce7 100755 --- a/src/transformers/models/vilt/modeling_vilt.py +++ b/src/transformers/models/vilt/modeling_vilt.py @@ -70,7 +70,7 @@ class ViltForImagesAndTextClassificationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[List[Tuple[torch.FloatTensor]]] = None attentions: Optional[List[Tuple[torch.FloatTensor]]] = None diff --git a/src/transformers/models/vipllava/modeling_vipllava.py b/src/transformers/models/vipllava/modeling_vipllava.py index 6216ef88da..3c706e43d5 100644 --- a/src/transformers/models/vipllava/modeling_vipllava.py +++ b/src/transformers/models/vipllava/modeling_vipllava.py @@ -76,7 +76,7 @@ class VipLlavaCausalLMOutputWithPast(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None past_key_values: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -306,8 +306,8 @@ class VipLlavaForConditionalGeneration(VipLlavaPreTrainedModel, GenerationMixin) # Ignore copy def forward( self, - input_ids: torch.LongTensor = None, - pixel_values: torch.FloatTensor = None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, diff --git a/src/transformers/models/visual_bert/modeling_visual_bert.py b/src/transformers/models/visual_bert/modeling_visual_bert.py index d9ce821101..7db7715f52 100755 --- a/src/transformers/models/visual_bert/modeling_visual_bert.py +++ b/src/transformers/models/visual_bert/modeling_visual_bert.py @@ -559,8 +559,8 @@ class VisualBertForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - prediction_logits: torch.FloatTensor = None - seq_relationship_logits: torch.FloatTensor = None + prediction_logits: Optional[torch.FloatTensor] = None + seq_relationship_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py index bc5a14f420..8879a8665f 100644 --- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py @@ -74,9 +74,9 @@ class TFViTMAEModelOutput(ModelOutput): the self-attention heads. """ - last_hidden_state: tf.Tensor = None - mask: tf.Tensor = None - ids_restore: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None + mask: Optional[tf.Tensor] = None + ids_restore: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -99,7 +99,7 @@ class TFViTMAEDecoderOutput(ModelOutput): the self-attention heads. """ - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -129,9 +129,9 @@ class TFViTMAEForPreTrainingOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None - mask: tf.Tensor = None - ids_restore: tf.Tensor = None + logits: Optional[tf.Tensor] = None + mask: Optional[tf.Tensor] = None + ids_restore: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None @@ -314,7 +314,7 @@ class TFViTMAEEmbeddings(keras.layers.Layer): return sequence_unmasked, mask, ids_restore def call( - self, pixel_values: tf.Tensor, noise: tf.Tensor = None, interpolate_pos_encoding: bool = False + self, pixel_values: tf.Tensor, noise: Optional[tf.Tensor] = None, interpolate_pos_encoding: bool = False ) -> tf.Tensor: batch_size, num_channels, height, width = shape_list(pixel_values) embeddings = self.patch_embeddings(pixel_values, interpolate_pos_encoding=interpolate_pos_encoding) @@ -775,7 +775,7 @@ class TFViTMAEMainLayer(keras.layers.Layer): def call( self, pixel_values: TFModelInputType | None = None, - noise: tf.Tensor = None, + noise: Optional[tf.Tensor] = None, head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -943,7 +943,7 @@ class TFViTMAEModel(TFViTMAEPreTrainedModel): def call( self, pixel_values: TFModelInputType | None = None, - noise: tf.Tensor = None, + noise: Optional[tf.Tensor] = None, head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1294,7 +1294,7 @@ class TFViTMAEForPreTraining(TFViTMAEPreTrainedModel): def call( self, pixel_values: TFModelInputType | None = None, - noise: tf.Tensor = None, + noise: Optional[tf.Tensor] = None, head_mask: np.ndarray | tf.Tensor | None = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/vit_mae/modeling_vit_mae.py b/src/transformers/models/vit_mae/modeling_vit_mae.py index e4f6a868ac..4636519ee6 100755 --- a/src/transformers/models/vit_mae/modeling_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_vit_mae.py @@ -67,9 +67,9 @@ class ViTMAEModelOutput(ModelOutput): the self-attention heads. """ - last_hidden_state: torch.FloatTensor = None - mask: torch.LongTensor = None - ids_restore: torch.LongTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + mask: Optional[torch.LongTensor] = None + ids_restore: Optional[torch.LongTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -92,7 +92,7 @@ class ViTMAEDecoderOutput(ModelOutput): the self-attention heads. """ - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -122,9 +122,9 @@ class ViTMAEForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None - mask: torch.LongTensor = None - ids_restore: torch.LongTensor = None + logits: Optional[torch.FloatTensor] = None + mask: Optional[torch.LongTensor] = None + ids_restore: Optional[torch.LongTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/vitmatte/modeling_vitmatte.py b/src/transformers/models/vitmatte/modeling_vitmatte.py index b27bc28870..aa3aa5b883 100644 --- a/src/transformers/models/vitmatte/modeling_vitmatte.py +++ b/src/transformers/models/vitmatte/modeling_vitmatte.py @@ -58,7 +58,7 @@ class ImageMattingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - alphas: torch.FloatTensor = None + alphas: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/vitpose/image_processing_vitpose.py b/src/transformers/models/vitpose/image_processing_vitpose.py index e7c5c524cb..387b722547 100644 --- a/src/transformers/models/vitpose/image_processing_vitpose.py +++ b/src/transformers/models/vitpose/image_processing_vitpose.py @@ -424,11 +424,11 @@ class VitPoseImageProcessor(BaseImageProcessor): self, images: ImageInput, boxes: Union[List[List[float]], np.ndarray], - do_affine_transform: bool = None, + do_affine_transform: Optional[bool] = None, size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -599,7 +599,7 @@ class VitPoseImageProcessor(BaseImageProcessor): outputs: "VitPoseEstimatorOutput", boxes: Union[List[List[List[float]]], np.ndarray], kernel_size: int = 11, - threshold: float = None, + threshold: Optional[float] = None, target_sizes: Union[TensorType, List[Tuple]] = None, ): """ diff --git a/src/transformers/models/vitpose/modeling_vitpose.py b/src/transformers/models/vitpose/modeling_vitpose.py index b5dd274654..dfe9738abf 100644 --- a/src/transformers/models/vitpose/modeling_vitpose.py +++ b/src/transformers/models/vitpose/modeling_vitpose.py @@ -62,7 +62,7 @@ class VitPoseEstimatorOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - heatmaps: torch.FloatTensor = None + heatmaps: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/vits/modeling_vits.py b/src/transformers/models/vits/modeling_vits.py index 7a506d497f..59483d3e61 100644 --- a/src/transformers/models/vits/modeling_vits.py +++ b/src/transformers/models/vits/modeling_vits.py @@ -69,8 +69,8 @@ class VitsModelOutput(ModelOutput): heads. """ - waveform: torch.FloatTensor = None - sequence_lengths: torch.FloatTensor = None + waveform: Optional[torch.FloatTensor] = None + sequence_lengths: Optional[torch.FloatTensor] = None spectrogram: Optional[Tuple[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None @@ -101,9 +101,9 @@ class VitsTextEncoderOutput(ModelOutput): heads. """ - last_hidden_state: torch.FloatTensor = None - prior_means: torch.FloatTensor = None - prior_log_variances: torch.FloatTensor = None + last_hidden_state: Optional[torch.FloatTensor] = None + prior_means: Optional[torch.FloatTensor] = None + prior_log_variances: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index 1343d97a91..8b369be41b 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -227,15 +227,15 @@ class VivitImageProcessor(BaseImageProcessor): def _preprocess_image( self, image: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - offset: bool = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + offset: Optional[bool] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, @@ -290,15 +290,15 @@ class VivitImageProcessor(BaseImageProcessor): def preprocess( self, videos: ImageInput, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Dict[str, int] = None, resample: PILImageResampling = None, - do_center_crop: bool = None, + do_center_crop: Optional[bool] = None, crop_size: Dict[str, int] = None, - do_rescale: bool = None, - rescale_factor: float = None, - offset: bool = None, - do_normalize: bool = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + offset: Optional[bool] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index 1cfbeb43a5..ad923bef80 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -78,8 +78,8 @@ class TFWav2Vec2BaseModelOutput(ModelOutput): heads. """ - last_hidden_state: tf.Tensor = None - extract_features: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None + extract_features: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor] | None = None attentions: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index 4537621f2d..93f7737253 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -126,9 +126,9 @@ class Wav2Vec2ForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - projected_states: torch.FloatTensor = None - projected_quantized_states: torch.FloatTensor = None - codevector_perplexity: torch.FloatTensor = None + projected_states: Optional[torch.FloatTensor] = None + projected_quantized_states: Optional[torch.FloatTensor] = None + codevector_perplexity: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None contrastive_loss: Optional[torch.FloatTensor] = None diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index 9e9048ee03..ad51a4e4d0 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -404,7 +404,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): self, token_ids: List[int], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, group_tokens: bool = True, spaces_between_special_tokens: bool = False, output_word_offsets: Optional[bool] = False, @@ -459,7 +459,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): self, sequences: Union[List[int], List[List[int]], "np.ndarray", "torch.Tensor", "tf.Tensor"], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, output_char_offsets: bool = False, output_word_offsets: bool = False, **kwargs, @@ -529,7 +529,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer): self, token_ids: Union[int, List[int], "np.ndarray", "torch.Tensor", "tf.Tensor"], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, output_char_offsets: bool = False, output_word_offsets: bool = False, **kwargs, @@ -876,7 +876,7 @@ class Wav2Vec2Tokenizer(PreTrainedTokenizer): self, token_ids: List[int], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, **kwargs, ) -> str: """ diff --git a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py index 4dde34c73c..587ae1c824 100644 --- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py @@ -100,9 +100,9 @@ class Wav2Vec2ConformerForPreTrainingOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - projected_states: torch.FloatTensor = None - projected_quantized_states: torch.FloatTensor = None - codevector_perplexity: torch.FloatTensor = None + projected_states: Optional[torch.FloatTensor] = None + projected_quantized_states: Optional[torch.FloatTensor] = None + codevector_perplexity: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None contrastive_loss: Optional[torch.FloatTensor] = None diff --git a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py index b617b17d02..574de200fa 100644 --- a/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +++ b/src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py @@ -408,7 +408,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer): self, token_ids: List[int], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, group_tokens: bool = True, filter_word_delimiter_token: bool = True, spaces_between_special_tokens: bool = False, @@ -455,7 +455,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer): self, token_ids: Union[int, List[int], "np.ndarray", "torch.Tensor", "tf.Tensor"], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, output_char_offsets: bool = False, **kwargs, ) -> str: @@ -511,7 +511,7 @@ class Wav2Vec2PhonemeCTCTokenizer(PreTrainedTokenizer): self, sequences: Union[List[int], List[List[int]], "np.ndarray", "torch.Tensor", "tf.Tensor"], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, output_char_offsets: bool = False, **kwargs, ) -> List[str]: diff --git a/src/transformers/models/whisper/modeling_whisper.py b/src/transformers/models/whisper/modeling_whisper.py index e265e15c38..42fb928b7a 100644 --- a/src/transformers/models/whisper/modeling_whisper.py +++ b/src/transformers/models/whisper/modeling_whisper.py @@ -1879,7 +1879,7 @@ class WhisperForCausalLM(WhisperPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, encoder_outputs: Optional[Tuple[torch.FloatTensor]] = None, head_mask: Optional[torch.Tensor] = None, diff --git a/src/transformers/models/whisper/tokenization_whisper.py b/src/transformers/models/whisper/tokenization_whisper.py index b5dbb49a36..9e4c89b591 100644 --- a/src/transformers/models/whisper/tokenization_whisper.py +++ b/src/transformers/models/whisper/tokenization_whisper.py @@ -378,7 +378,7 @@ class WhisperTokenizer(PreTrainedTokenizer): return word def set_prefix_tokens( - self, language: Optional[str] = None, task: Optional[str] = None, predict_timestamps: bool = None + self, language: Optional[str] = None, task: Optional[str] = None, predict_timestamps: Optional[bool] = None ): """ Override the prefix tokens appended to the start of the label sequence. This method can be used standalone to @@ -676,7 +676,7 @@ class WhisperTokenizer(PreTrainedTokenizer): self, token_ids, skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, output_offsets: bool = False, time_precision: float = 0.02, decode_with_timestamps: bool = False, diff --git a/src/transformers/models/whisper/tokenization_whisper_fast.py b/src/transformers/models/whisper/tokenization_whisper_fast.py index 5b25def5e4..64dcc186bf 100644 --- a/src/transformers/models/whisper/tokenization_whisper_fast.py +++ b/src/transformers/models/whisper/tokenization_whisper_fast.py @@ -313,7 +313,7 @@ class WhisperTokenizerFast(PreTrainedTokenizerFast): self, token_ids, skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, output_offsets: bool = False, time_precision: float = 0.02, decode_with_timestamps: bool = False, @@ -452,7 +452,7 @@ class WhisperTokenizerFast(PreTrainedTokenizerFast): return tuple(files) + (normalizer_file,) def set_prefix_tokens( - self, language: Optional[str] = None, task: Optional[str] = None, predict_timestamps: bool = None + self, language: Optional[str] = None, task: Optional[str] = None, predict_timestamps: Optional[bool] = None ): """ Override the prefix tokens appended to the start of the label sequence. This method can be used standalone to diff --git a/src/transformers/models/x_clip/modeling_x_clip.py b/src/transformers/models/x_clip/modeling_x_clip.py index 2f27e19dd5..f85b4636cd 100644 --- a/src/transformers/models/x_clip/modeling_x_clip.py +++ b/src/transformers/models/x_clip/modeling_x_clip.py @@ -81,10 +81,10 @@ class XCLIPOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits_per_video: torch.FloatTensor = None - logits_per_text: torch.FloatTensor = None - text_embeds: torch.FloatTensor = None - video_embeds: torch.FloatTensor = None + logits_per_video: Optional[torch.FloatTensor] = None + logits_per_text: Optional[torch.FloatTensor] = None + text_embeds: Optional[torch.FloatTensor] = None + video_embeds: Optional[torch.FloatTensor] = None text_model_output: BaseModelOutputWithPooling = None vision_model_output: BaseModelOutputWithPooling = None mit_output: BaseModelOutputWithPooling = None diff --git a/src/transformers/models/xglm/modeling_xglm.py b/src/transformers/models/xglm/modeling_xglm.py index 9d1adf7370..feb91b46f3 100755 --- a/src/transformers/models/xglm/modeling_xglm.py +++ b/src/transformers/models/xglm/modeling_xglm.py @@ -177,7 +177,7 @@ class XGLMSinusoidalPositionalEmbedding(nn.Module): return emb.to(torch.get_default_dtype()) @torch.no_grad() - def forward(self, position_ids: torch.Tensor = None, past_key_values_length: int = 0): + def forward(self, position_ids: Optional[torch.Tensor] = None, past_key_values_length: int = 0): bsz, seq_len = position_ids.size() position_ids += self.offset diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index 87f4dbca17..d161778c07 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -599,7 +599,7 @@ class TFXLMWithLMHeadModelOutput(ModelOutput): heads. """ - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 83e097ebc8..2383352ae6 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -863,7 +863,7 @@ class TFXLNetModelOutput(ModelOutput): heads. """ - last_hidden_state: tf.Tensor = None + last_hidden_state: Optional[tf.Tensor] = None mems: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -900,7 +900,7 @@ class TFXLNetLMHeadModelOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None mems: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -934,7 +934,7 @@ class TFXLNetForSequenceClassificationOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None mems: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -968,7 +968,7 @@ class TFXLNetForTokenClassificationOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None mems: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -1004,7 +1004,7 @@ class TFXLNetForMultipleChoiceOutput(ModelOutput): """ loss: tf.Tensor | None = None - logits: tf.Tensor = None + logits: Optional[tf.Tensor] = None mems: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None @@ -1040,8 +1040,8 @@ class TFXLNetForQuestionAnsweringSimpleOutput(ModelOutput): """ loss: tf.Tensor | None = None - start_logits: tf.Tensor = None - end_logits: tf.Tensor = None + start_logits: Optional[tf.Tensor] = None + end_logits: Optional[tf.Tensor] = None mems: List[tf.Tensor] | None = None hidden_states: Tuple[tf.Tensor, ...] | None = None attentions: Tuple[tf.Tensor, ...] | None = None diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index f689e417bf..db0703f949 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -636,7 +636,7 @@ class XLNetLMHeadModelOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None mems: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -670,7 +670,7 @@ class XLNetForSequenceClassificationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None mems: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -704,7 +704,7 @@ class XLNetForTokenClassificationOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None mems: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -740,7 +740,7 @@ class XLNetForMultipleChoiceOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - logits: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None mems: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @@ -776,8 +776,8 @@ class XLNetForQuestionAnsweringSimpleOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - start_logits: torch.FloatTensor = None - end_logits: torch.FloatTensor = None + start_logits: Optional[torch.FloatTensor] = None + end_logits: Optional[torch.FloatTensor] = None mems: Optional[List[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/models/xlnet/tokenization_xlnet.py b/src/transformers/models/xlnet/tokenization_xlnet.py index ab40980211..640558e1d8 100644 --- a/src/transformers/models/xlnet/tokenization_xlnet.py +++ b/src/transformers/models/xlnet/tokenization_xlnet.py @@ -240,7 +240,7 @@ class XLNetTokenizer(PreTrainedTokenizer): self, token_ids: List[int], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, spaces_between_special_tokens: bool = True, **kwargs, ) -> str: diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index 5a94d6b41d..3cff6950dd 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -862,7 +862,7 @@ class YolosImageProcessor(BaseImageProcessor): image: np.ndarray, target: Dict, format: Optional[AnnotationFormat] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> Dict: @@ -1177,7 +1177,7 @@ class YolosImageProcessor(BaseImageProcessor): self, images: ImageInput, annotations: Optional[Union[AnnotationType, List[AnnotationType]]] = None, - return_segmentation_masks: bool = None, + return_segmentation_masks: Optional[bool] = None, masks_path: Optional[Union[str, pathlib.Path]] = None, do_resize: Optional[bool] = None, size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/yolos/modeling_yolos.py b/src/transformers/models/yolos/modeling_yolos.py index 06edd9b4e5..42bebb37f7 100755 --- a/src/transformers/models/yolos/modeling_yolos.py +++ b/src/transformers/models/yolos/modeling_yolos.py @@ -84,8 +84,8 @@ class YolosObjectDetectionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None loss_dict: Optional[Dict] = None - logits: torch.FloatTensor = None - pred_boxes: torch.FloatTensor = None + logits: Optional[torch.FloatTensor] = None + pred_boxes: Optional[torch.FloatTensor] = None auxiliary_outputs: Optional[List[Dict]] = None last_hidden_state: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/zamba/modeling_zamba.py b/src/transformers/models/zamba/modeling_zamba.py index 7033102eee..3018d3aaf6 100644 --- a/src/transformers/models/zamba/modeling_zamba.py +++ b/src/transformers/models/zamba/modeling_zamba.py @@ -1033,7 +1033,7 @@ class ZambaModel(ZambaPreTrainedModel): @add_start_docstrings_to_model_forward(ZAMBA_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[ZambaHybridDynamicCache] = None, @@ -1212,7 +1212,7 @@ class ZambaForCausalLM(ZambaPreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[ZambaHybridDynamicCache] = None, diff --git a/src/transformers/models/zamba2/modeling_zamba2.py b/src/transformers/models/zamba2/modeling_zamba2.py index 2c60cc2276..7f4a17498a 100644 --- a/src/transformers/models/zamba2/modeling_zamba2.py +++ b/src/transformers/models/zamba2/modeling_zamba2.py @@ -1414,7 +1414,7 @@ class Zamba2Model(Zamba2PreTrainedModel): @add_start_docstrings_to_model_forward(ZAMBA2_INPUTS_DOCSTRING) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Zamba2HybridDynamicCache] = None, @@ -1651,7 +1651,7 @@ class Zamba2ForCausalLM(Zamba2PreTrainedModel, GenerationMixin): @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC) def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Zamba2HybridDynamicCache] = None, diff --git a/src/transformers/models/zamba2/modular_zamba2.py b/src/transformers/models/zamba2/modular_zamba2.py index 625cdb0bf5..ece5fb1065 100644 --- a/src/transformers/models/zamba2/modular_zamba2.py +++ b/src/transformers/models/zamba2/modular_zamba2.py @@ -1041,7 +1041,7 @@ class Zamba2Model(ZambaModel, Zamba2PreTrainedModel): def forward( self, - input_ids: torch.LongTensor = None, + input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[Zamba2HybridDynamicCache] = None, diff --git a/src/transformers/models/zoedepth/image_processing_zoedepth.py b/src/transformers/models/zoedepth/image_processing_zoedepth.py index c93e29c583..d40b938403 100644 --- a/src/transformers/models/zoedepth/image_processing_zoedepth.py +++ b/src/transformers/models/zoedepth/image_processing_zoedepth.py @@ -298,15 +298,15 @@ class ZoeDepthImageProcessor(BaseImageProcessor): def preprocess( self, images: ImageInput, - do_pad: bool = None, - do_rescale: bool = None, - rescale_factor: float = None, - do_normalize: bool = None, + do_pad: Optional[bool] = None, + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_resize: bool = None, + do_resize: Optional[bool] = None, size: Optional[int] = None, - keep_aspect_ratio: bool = None, + keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, resample: PILImageResampling = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/zoedepth/modeling_zoedepth.py b/src/transformers/models/zoedepth/modeling_zoedepth.py index 57cad7328f..5f2e1a8656 100644 --- a/src/transformers/models/zoedepth/modeling_zoedepth.py +++ b/src/transformers/models/zoedepth/modeling_zoedepth.py @@ -69,8 +69,8 @@ class ZoeDepthDepthEstimatorOutput(ModelOutput): """ loss: Optional[torch.FloatTensor] = None - predicted_depth: torch.FloatTensor = None - domain_logits: torch.FloatTensor = None + predicted_depth: Optional[torch.FloatTensor] = None + domain_logits: Optional[torch.FloatTensor] = None hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None attentions: Optional[Tuple[torch.FloatTensor, ...]] = None diff --git a/src/transformers/optimization.py b/src/transformers/optimization.py index 2e6ac0161c..ba9d9920c1 100644 --- a/src/transformers/optimization.py +++ b/src/transformers/optimization.py @@ -339,8 +339,8 @@ def get_cosine_with_min_lr_schedule_with_warmup( num_training_steps: int, num_cycles: float = 0.5, last_epoch: int = -1, - min_lr: float = None, - min_lr_rate: float = None, + min_lr: Optional[float] = None, + min_lr_rate: Optional[float] = None, ): """ Create a schedule with a learning rate that decreases following the values of the cosine function between the diff --git a/src/transformers/tf_utils.py b/src/transformers/tf_utils.py index cf73731577..c3770cb123 100644 --- a/src/transformers/tf_utils.py +++ b/src/transformers/tf_utils.py @@ -105,7 +105,7 @@ def functional_layernorm(inputs, weight, bias, epsilon=1e-5, axis=-1): def scaled_dot_product_attention( - query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False, scale: float = None + query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False, scale: Optional[float] = None ): """TF equivalent for torch's nn.functional.scaled_dot_product_attention""" if dropout_p != 0.0: diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index dce4c581bf..51382d776f 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -1081,7 +1081,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase): self, token_ids: Union[int, list[int]], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, spaces_between_special_tokens: bool = True, **kwargs, ) -> str: diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 6a88acf711..cb1169fe02 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -3024,7 +3024,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = None, + truncation: Union[bool, str, TruncationStrategy, None] = None, max_length: Optional[int] = None, stride: int = 0, is_split_into_words: bool = False, @@ -3453,7 +3453,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): pair_ids: Optional[List[int]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = None, + truncation: Union[bool, str, TruncationStrategy, None] = None, max_length: Optional[int] = None, stride: int = 0, pad_to_multiple_of: Optional[int] = None, diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 75819a1038..54605dbb02 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -658,7 +658,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): self, token_ids: Union[int, list[int]], skip_special_tokens: bool = False, - clean_up_tokenization_spaces: bool = None, + clean_up_tokenization_spaces: Optional[bool] = None, **kwargs, ) -> str: self._decode_use_source_tokenizer = kwargs.pop("use_source_tokenizer", False) diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 5a618f901c..61fb91e5a8 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -1864,7 +1864,7 @@ class _LazyModule(ModuleType): name: str, module_file: str, import_structure: IMPORT_STRUCTURE_T, - module_spec: importlib.machinery.ModuleSpec = None, + module_spec: Optional[importlib.machinery.ModuleSpec] = None, extra_objects: Dict[str, object] = None, ): super().__init__(name)