diff --git a/examples/legacy/seq2seq/run_distributed_eval.py b/examples/legacy/seq2seq/run_distributed_eval.py index 41855eaed6..c491b01af9 100755 --- a/examples/legacy/seq2seq/run_distributed_eval.py +++ b/examples/legacy/seq2seq/run_distributed_eval.py @@ -19,6 +19,7 @@ import time from json import JSONDecodeError from logging import getLogger from pathlib import Path +from typing import Optional import torch from torch.utils.data import DataLoader @@ -54,7 +55,7 @@ def eval_data_dir( task="summarization", local_rank=None, num_return_sequences=1, - dataset_kwargs: dict = None, + dataset_kwargs: Optional[dict] = None, prefix="", **generate_kwargs, ) -> dict: diff --git a/examples/modular-transformers/image_processing_new_imgproc_model.py b/examples/modular-transformers/image_processing_new_imgproc_model.py index 8320a25228..94274bb8f2 100644 --- a/examples/modular-transformers/image_processing_new_imgproc_model.py +++ b/examples/modular-transformers/image_processing_new_imgproc_model.py @@ -74,7 +74,7 @@ class ImgprocModelImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: dict[str, int] = None, + size: Optional[dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -159,7 +159,7 @@ class ImgprocModelImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, list[float]]] = None, image_std: Optional[Union[float, list[float]]] = None, return_tensors: Optional[Union[str, TensorType]] = None, - do_convert_rgb: bool = None, + do_convert_rgb: Optional[bool] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> PIL.Image.Image: diff --git a/src/transformers/cache_utils.py b/src/transformers/cache_utils.py index cb2c8c5cdc..7b6ba5ee3f 100644 --- a/src/transformers/cache_utils.py +++ b/src/transformers/cache_utils.py @@ -359,7 +359,7 @@ class DynamicCache(Cache): ``` """ - def __init__(self, _distributed_cache_data: Iterable = None) -> None: + def __init__(self, _distributed_cache_data: Optional[Iterable] = None) -> None: super().__init__() self._seen_tokens = 0 # Used in `generate` to keep tally of how many tokens the cache has seen self.key_cache: List[torch.Tensor] = [] diff --git a/src/transformers/commands/add_new_model_like.py b/src/transformers/commands/add_new_model_like.py index bfb812340e..a16a02c462 100644 --- a/src/transformers/commands/add_new_model_like.py +++ b/src/transformers/commands/add_new_model_like.py @@ -512,7 +512,7 @@ def duplicate_module( new_model_patterns: ModelPatterns, dest_file: Optional[str] = None, add_copied_from: bool = True, - attrs_to_remove: List[str] = None, + attrs_to_remove: Optional[List[str]] = None, ): """ Create a new module from an existing one and adapting all function and classes names from old patterns to new ones. diff --git a/src/transformers/convert_slow_tokenizer.py b/src/transformers/convert_slow_tokenizer.py index c8cc1cdbe9..5716ee4bf5 100644 --- a/src/transformers/convert_slow_tokenizer.py +++ b/src/transformers/convert_slow_tokenizer.py @@ -19,6 +19,7 @@ allow to make our dependency on SentencePiece optional. """ import warnings +from typing import Optional from packaging import version from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors @@ -326,7 +327,9 @@ class OpenAIGPTConverter(Converter): class GPT2Converter(Converter): - def converted(self, vocab: dict[str, int] = None, merges: list[tuple[str, str]] = None) -> Tokenizer: + def converted( + self, vocab: Optional[dict[str, int]] = None, merges: Optional[list[tuple[str, str]]] = None + ) -> Tokenizer: if not vocab: vocab = self.original_tokenizer.encoder if not merges: @@ -395,7 +398,9 @@ class HerbertConverter(Converter): class Qwen2Converter(Converter): - def converted(self, vocab: dict[str, int] = None, merges: list[tuple[str, str]] = None) -> Tokenizer: + def converted( + self, vocab: Optional[dict[str, int]] = None, merges: Optional[list[tuple[str, str]]] = None + ) -> Tokenizer: if not vocab: vocab = self.original_tokenizer.encoder if not merges: diff --git a/src/transformers/image_processing_utils.py b/src/transformers/image_processing_utils.py index dd08be2941..b3acbb3feb 100644 --- a/src/transformers/image_processing_utils.py +++ b/src/transformers/image_processing_utils.py @@ -209,7 +209,7 @@ def convert_to_size_dict( def get_size_dict( - size: Union[int, Iterable[int], dict[str, int]] = None, + size: Optional[Union[int, Iterable[int], dict[str, int]]] = None, max_size: Optional[int] = None, height_width_order: bool = True, default_to_square: bool = True, diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py index 644bb76334..2f590bce0e 100644 --- a/src/transformers/image_processing_utils_fast.py +++ b/src/transformers/image_processing_utils_fast.py @@ -755,7 +755,7 @@ class BaseImageProcessorFast(BaseImageProcessor): class SemanticSegmentationMixin: - def post_process_semantic_segmentation(self, outputs, target_sizes: list[tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[list[tuple]] = None): """ Converts the output of [`MobileNetV2ForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/integrations/peft.py b/src/transformers/integrations/peft.py index 0c9402abe6..8a1652748f 100644 --- a/src/transformers/integrations/peft.py +++ b/src/transformers/integrations/peft.py @@ -79,7 +79,7 @@ class PeftAdapterMixin: max_memory: Optional[str] = None, offload_folder: Optional[str] = None, offload_index: Optional[int] = None, - peft_config: Dict[str, Any] = None, + peft_config: Optional[Dict[str, Any]] = None, adapter_state_dict: Optional[Dict[str, "torch.Tensor"]] = None, low_cpu_mem_usage: bool = False, is_trainable: bool = False, diff --git a/src/transformers/models/albert/modeling_flax_albert.py b/src/transformers/models/albert/modeling_flax_albert.py index df2ebddc7e..f7e5f22207 100644 --- a/src/transformers/models/albert/modeling_flax_albert.py +++ b/src/transformers/models/albert/modeling_flax_albert.py @@ -558,7 +558,7 @@ class FlaxAlbertPreTrainedModel(FlaxPreTrainedModel): attention_mask=None, token_type_ids=None, position_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/aria/configuration_aria.py b/src/transformers/models/aria/configuration_aria.py index f3faa60ca3..5843e726d6 100644 --- a/src/transformers/models/aria/configuration_aria.py +++ b/src/transformers/models/aria/configuration_aria.py @@ -18,7 +18,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict +from typing import Dict, Optional from ...configuration_utils import PretrainedConfig from ...modeling_rope_utils import rope_config_validation @@ -268,7 +268,7 @@ class AriaConfig(PretrainedConfig): vision_config=None, vision_feature_layer: int = -1, text_config: AriaTextConfig = None, - projector_patch_to_query_dict: Dict = None, + projector_patch_to_query_dict: Optional[Dict] = None, image_token_index: int = 9, initializer_range: float = 0.02, **kwargs, diff --git a/src/transformers/models/aria/image_processing_aria.py b/src/transformers/models/aria/image_processing_aria.py index 364f8f70df..d1a722e905 100644 --- a/src/transformers/models/aria/image_processing_aria.py +++ b/src/transformers/models/aria/image_processing_aria.py @@ -124,8 +124,8 @@ class AriaImageProcessor(BaseImageProcessor): def __init__( self, - image_mean: List[float] = None, - image_std: List[float] = None, + image_mean: Optional[List[float]] = None, + image_std: Optional[List[float]] = None, max_image_size: int = 980, min_image_size: int = 336, split_resolutions: Optional[List[Tuple[int, int]]] = None, diff --git a/src/transformers/models/aria/modular_aria.py b/src/transformers/models/aria/modular_aria.py index add5bdc16b..51e203b07b 100644 --- a/src/transformers/models/aria/modular_aria.py +++ b/src/transformers/models/aria/modular_aria.py @@ -276,7 +276,7 @@ class AriaConfig(PretrainedConfig): vision_config=None, vision_feature_layer: int = -1, text_config: AriaTextConfig = None, - projector_patch_to_query_dict: Dict = None, + projector_patch_to_query_dict: Optional[Dict] = None, image_token_index: int = 9, initializer_range: float = 0.02, **kwargs, @@ -514,8 +514,8 @@ class AriaImageProcessor(BaseImageProcessor): def __init__( self, - image_mean: List[float] = None, - image_std: List[float] = None, + image_mean: Optional[List[float]] = None, + image_std: Optional[List[float]] = None, max_image_size: int = 980, min_image_size: int = 336, split_resolutions: Optional[List[Tuple[int, int]]] = None, diff --git a/src/transformers/models/bark/configuration_bark.py b/src/transformers/models/bark/configuration_bark.py index 932bad618a..e8e304d218 100644 --- a/src/transformers/models/bark/configuration_bark.py +++ b/src/transformers/models/bark/configuration_bark.py @@ -14,7 +14,7 @@ # limitations under the License. """BARK model configuration""" -from typing import Dict +from typing import Dict, Optional from ...configuration_utils import PretrainedConfig from ...utils import add_start_docstrings, logging @@ -243,10 +243,10 @@ class BarkConfig(PretrainedConfig): def __init__( self, - semantic_config: Dict = None, - coarse_acoustics_config: Dict = None, - fine_acoustics_config: Dict = None, - codec_config: Dict = None, + semantic_config: Optional[Dict] = None, + coarse_acoustics_config: Optional[Dict] = None, + fine_acoustics_config: Optional[Dict] = None, + codec_config: Optional[Dict] = None, initializer_range=0.02, **kwargs, ): diff --git a/src/transformers/models/bark/generation_configuration_bark.py b/src/transformers/models/bark/generation_configuration_bark.py index 00ff22c8b8..bb1fc26655 100644 --- a/src/transformers/models/bark/generation_configuration_bark.py +++ b/src/transformers/models/bark/generation_configuration_bark.py @@ -15,7 +15,7 @@ """BARK model generation configuration""" import copy -from typing import Dict +from typing import Dict, Optional from ...generation.configuration_utils import GenerationConfig from ...utils import logging @@ -245,9 +245,9 @@ class BarkGenerationConfig(GenerationConfig): def __init__( self, - semantic_config: Dict = None, - coarse_acoustics_config: Dict = None, - fine_acoustics_config: Dict = None, + semantic_config: Optional[Dict] = None, + coarse_acoustics_config: Optional[Dict] = None, + fine_acoustics_config: Optional[Dict] = None, sample_rate=24_000, codebook_size=1024, **kwargs, diff --git a/src/transformers/models/bart/modeling_flax_bart.py b/src/transformers/models/bart/modeling_flax_bart.py index 18c8f6b85c..f04ab551e3 100644 --- a/src/transformers/models/bart/modeling_flax_bart.py +++ b/src/transformers/models/bart/modeling_flax_bart.py @@ -1007,7 +1007,7 @@ class FlaxBartPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1068,12 +1068,12 @@ class FlaxBartPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1186,7 +1186,7 @@ class FlaxBartPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1335,12 +1335,12 @@ class FlaxBartForConditionalGeneration(FlaxBartPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1807,8 +1807,8 @@ class FlaxBartDecoderPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/beit/image_processing_beit.py b/src/transformers/models/beit/image_processing_beit.py index eb2950f0e2..a83cf10aad 100644 --- a/src/transformers/models/beit/image_processing_beit.py +++ b/src/transformers/models/beit/image_processing_beit.py @@ -106,10 +106,10 @@ class BeitImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, rescale_factor: Union[int, float] = 1 / 255, do_rescale: bool = True, do_normalize: bool = True, @@ -194,10 +194,10 @@ class BeitImageProcessor(BaseImageProcessor): image: ImageInput, do_reduce_labels: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -226,10 +226,10 @@ class BeitImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -271,10 +271,10 @@ class BeitImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_reduce_labels: Optional[bool] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ): @@ -320,10 +320,10 @@ class BeitImageProcessor(BaseImageProcessor): images: ImageInput, segmentation_maps: Optional[ImageInput] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -470,7 +470,7 @@ class BeitImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`BeitForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/beit/modeling_flax_beit.py b/src/transformers/models/beit/modeling_flax_beit.py index d37eedea3f..b51ff9fd09 100644 --- a/src/transformers/models/beit/modeling_flax_beit.py +++ b/src/transformers/models/beit/modeling_flax_beit.py @@ -634,7 +634,7 @@ class FlaxBeitPreTrainedModel(FlaxPreTrainedModel): self, pixel_values, bool_masked_pos=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/bert/modeling_flax_bert.py b/src/transformers/models/bert/modeling_flax_bert.py index 61939a53f4..48b72193fa 100644 --- a/src/transformers/models/bert/modeling_flax_bert.py +++ b/src/transformers/models/bert/modeling_flax_bert.py @@ -864,13 +864,13 @@ class FlaxBertPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/big_bird/modeling_flax_big_bird.py b/src/transformers/models/big_bird/modeling_flax_big_bird.py index e3bdfc38da..18913e930d 100644 --- a/src/transformers/models/big_bird/modeling_flax_big_bird.py +++ b/src/transformers/models/big_bird/modeling_flax_big_bird.py @@ -1725,14 +1725,14 @@ class FlaxBigBirdPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: Optional[jax.random.PRNGKey] = None, indices_rng: Optional[jax.random.PRNGKey] = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -2442,7 +2442,7 @@ class FlaxBigBirdForQuestionAnswering(FlaxBigBirdPreTrainedModel): position_ids=None, head_mask=None, question_lengths=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: Optional[jax.random.PRNGKey] = None, indices_rng: Optional[jax.random.PRNGKey] = None, train: bool = False, diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py index 2b1f307a29..aa2eb37955 100644 --- a/src/transformers/models/bit/image_processing_bit.py +++ b/src/transformers/models/bit/image_processing_bit.py @@ -92,10 +92,10 @@ class BitImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -177,7 +177,7 @@ class BitImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/blenderbot/modeling_flax_blenderbot.py b/src/transformers/models/blenderbot/modeling_flax_blenderbot.py index 1e0775cd08..835cb6814a 100644 --- a/src/transformers/models/blenderbot/modeling_flax_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_flax_blenderbot.py @@ -980,7 +980,7 @@ class FlaxBlenderbotPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1043,12 +1043,12 @@ class FlaxBlenderbotPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1161,7 +1161,7 @@ class FlaxBlenderbotPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1311,12 +1311,12 @@ class FlaxBlenderbotForConditionalGeneration(FlaxBlenderbotPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py index 6aceaa611c..1e6a3a727a 100644 --- a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py @@ -977,7 +977,7 @@ class FlaxBlenderbotSmallPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1040,12 +1040,12 @@ class FlaxBlenderbotSmallPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1157,7 +1157,7 @@ class FlaxBlenderbotSmallPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1308,12 +1308,12 @@ class FlaxBlenderbotSmallForConditionalGeneration(FlaxBlenderbotSmallPreTrainedM encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, deterministic: bool = True, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py index 9f28b33a66..ace61142ec 100644 --- a/src/transformers/models/blip/image_processing_blip.py +++ b/src/transformers/models/blip/image_processing_blip.py @@ -83,7 +83,7 @@ class BlipImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, diff --git a/src/transformers/models/bloom/configuration_bloom.py b/src/transformers/models/bloom/configuration_bloom.py index ca10c7ce7e..fb6fc00b94 100644 --- a/src/transformers/models/bloom/configuration_bloom.py +++ b/src/transformers/models/bloom/configuration_bloom.py @@ -148,7 +148,7 @@ class BloomOnnxConfig(OnnxConfigWithPast): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, use_past: bool = False, ): super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past) diff --git a/src/transformers/models/bloom/modeling_flax_bloom.py b/src/transformers/models/bloom/modeling_flax_bloom.py index 51ccb4c362..d0b2f084d3 100644 --- a/src/transformers/models/bloom/modeling_flax_bloom.py +++ b/src/transformers/models/bloom/modeling_flax_bloom.py @@ -463,8 +463,8 @@ class FlaxBloomPreTrainedModel(FlaxPreTrainedModel): self, input_ids, attention_mask=None, - past_key_values: dict = None, - params: dict = None, + past_key_values: Optional[dict] = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower.py b/src/transformers/models/bridgetower/image_processing_bridgetower.py index 95eaa9f88b..1f651eba1d 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py @@ -172,7 +172,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, @@ -181,7 +181,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_pad: bool = True, **kwargs, ) -> None: @@ -385,7 +385,7 @@ class BridgeTowerImageProcessor(BaseImageProcessor): image_std: Optional[Union[float, List[float]]] = None, do_pad: Optional[bool] = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/camembert/modeling_camembert.py b/src/transformers/models/camembert/modeling_camembert.py index b69590ae21..f1ab156503 100644 --- a/src/transformers/models/camembert/modeling_camembert.py +++ b/src/transformers/models/camembert/modeling_camembert.py @@ -1581,7 +1581,7 @@ class CamembertForCausalLM(CamembertPreTrainedModel, GenerationMixin): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/chameleon/configuration_chameleon.py b/src/transformers/models/chameleon/configuration_chameleon.py index 2cc9cdb29d..5955ef4894 100644 --- a/src/transformers/models/chameleon/configuration_chameleon.py +++ b/src/transformers/models/chameleon/configuration_chameleon.py @@ -14,7 +14,7 @@ # limitations under the License. """chameleon model configuration""" -from typing import List +from typing import List, Optional from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -75,7 +75,7 @@ class ChameleonVQVAEConfig(PretrainedConfig): base_channels: int = 128, channel_multiplier: List[int] = [1, 1, 2, 2, 4], num_res_blocks: int = 2, - attn_resolutions: List[int] = None, + attn_resolutions: Optional[List[int]] = None, dropout: float = 0.0, attn_type: str = "vanilla", initializer_range=0.02, diff --git a/src/transformers/models/chameleon/image_processing_chameleon.py b/src/transformers/models/chameleon/image_processing_chameleon.py index 2d1417a8ee..e694cee7bb 100644 --- a/src/transformers/models/chameleon/image_processing_chameleon.py +++ b/src/transformers/models/chameleon/image_processing_chameleon.py @@ -88,10 +88,10 @@ class ChameleonImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PIL.Image.LANCZOS, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 0.0078, do_normalize: bool = True, @@ -173,7 +173,7 @@ class ChameleonImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index d14d286b57..e8f8ba1e8d 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -96,10 +96,10 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -170,7 +170,7 @@ class ChineseCLIPImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py index 77215ad636..a506da423d 100644 --- a/src/transformers/models/clip/image_processing_clip.py +++ b/src/transformers/models/clip/image_processing_clip.py @@ -95,10 +95,10 @@ class CLIPImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -203,7 +203,7 @@ class CLIPImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/clip/modeling_flax_clip.py b/src/transformers/models/clip/modeling_flax_clip.py index c674d35e3d..c8eb6cf02e 100644 --- a/src/transformers/models/clip/modeling_flax_clip.py +++ b/src/transformers/models/clip/modeling_flax_clip.py @@ -667,7 +667,7 @@ class FlaxCLIPTextPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -745,7 +745,7 @@ class FlaxCLIPVisionPreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -823,7 +823,7 @@ class FlaxCLIPPreTrainedModel(FlaxPreTrainedModel): pixel_values, attention_mask=None, position_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -867,7 +867,7 @@ class FlaxCLIPPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train=False, ): @@ -930,7 +930,7 @@ class FlaxCLIPPreTrainedModel(FlaxPreTrainedModel): ) def get_image_features( - self, pixel_values, params: dict = None, dropout_rng: jax.random.PRNGKey = None, train=False + self, pixel_values, params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train=False ): r""" Args: diff --git a/src/transformers/models/codegen/configuration_codegen.py b/src/transformers/models/codegen/configuration_codegen.py index 6de483cb79..7ed03ab3f6 100644 --- a/src/transformers/models/codegen/configuration_codegen.py +++ b/src/transformers/models/codegen/configuration_codegen.py @@ -151,7 +151,7 @@ class CodeGenOnnxConfig(OnnxConfigWithPast): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, use_past: bool = False, ): super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past) diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index 3c256e4f70..83dc0f2c8b 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -749,7 +749,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -863,13 +863,13 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, @@ -1633,7 +1633,7 @@ class ConditionalDetrImageProcessor(BaseImageProcessor): return results # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_semantic_segmentation with Detr->ConditionalDetr - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None): """ Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py index efa1d9476e..0566eb3947 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py @@ -850,7 +850,7 @@ class ConditionalDetrImageProcessorFast(BaseImageProcessorFast): return results - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None): """ Converts the output of [`ConditionalDetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/convnext/image_processing_convnext.py b/src/transformers/models/convnext/image_processing_convnext.py index 2f7e445241..5093c9d33b 100644 --- a/src/transformers/models/convnext/image_processing_convnext.py +++ b/src/transformers/models/convnext/image_processing_convnext.py @@ -91,7 +91,7 @@ class ConvNextImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_pct: Optional[float] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, @@ -190,7 +190,7 @@ class ConvNextImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_pct: Optional[float] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, diff --git a/src/transformers/models/cpmant/tokenization_cpmant.py b/src/transformers/models/cpmant/tokenization_cpmant.py index 2da1d6286c..e5cc353cc5 100644 --- a/src/transformers/models/cpmant/tokenization_cpmant.py +++ b/src/transformers/models/cpmant/tokenization_cpmant.py @@ -222,7 +222,9 @@ class CpmAntTokenizer(PreTrainedTokenizer): index += 1 return (vocab_file,) - def build_inputs_with_special_tokens(self, token_ids_0: List[int], token_ids_1: List[int] = None) -> List[int]: + def build_inputs_with_special_tokens( + self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None + ) -> List[int]: """ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A CPMAnt sequence has the following format: diff --git a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py index 32746a38dd..3d8cf3e279 100644 --- a/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py +++ b/src/transformers/models/dab_detr/convert_dab_detr_original_pytorch_checkpoint_to_pytorch.py @@ -19,6 +19,7 @@ import gc import json import re from pathlib import Path +from typing import Optional import torch from huggingface_hub import hf_hub_download @@ -87,7 +88,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { # Copied from transformers.models.mllama.convert_mllama_weights_to_hf.convert_old_keys_to_new_keys -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/dbrx/configuration_dbrx.py b/src/transformers/models/dbrx/configuration_dbrx.py index 72df1fe335..36d48380b8 100644 --- a/src/transformers/models/dbrx/configuration_dbrx.py +++ b/src/transformers/models/dbrx/configuration_dbrx.py @@ -89,7 +89,7 @@ class DbrxFFNConfig(PretrainedConfig): def __init__( self, - ffn_act_fn: dict = None, + ffn_act_fn: Optional[dict] = None, ffn_hidden_size: int = 3584, moe_num_experts: int = 4, moe_top_k: int = 1, diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index f7ad8a1499..81cc3b8f33 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -747,7 +747,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -861,13 +861,13 @@ class DeformableDetrImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/deit/image_processing_deit.py b/src/transformers/models/deit/image_processing_deit.py index b05622be06..7b198f5200 100644 --- a/src/transformers/models/deit/image_processing_deit.py +++ b/src/transformers/models/deit/image_processing_deit.py @@ -84,10 +84,10 @@ class DeiTImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PIL.Image.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, rescale_factor: Union[int, float] = 1 / 255, do_rescale: bool = True, do_normalize: bool = True, @@ -166,10 +166,10 @@ class DeiTImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample=None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/deprecated/deta/image_processing_deta.py b/src/transformers/models/deprecated/deta/image_processing_deta.py index c63be13827..e76228fb6b 100644 --- a/src/transformers/models/deprecated/deta/image_processing_deta.py +++ b/src/transformers/models/deprecated/deta/image_processing_deta.py @@ -553,13 +553,13 @@ class DetaImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: bool = True, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py index 74d16a048d..83a78ac65f 100644 --- a/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py +++ b/src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py @@ -91,7 +91,7 @@ class EfficientFormerImageProcessor(BaseImageProcessor): do_center_crop: bool = True, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, @@ -179,7 +179,7 @@ class EfficientFormerImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/deprecated/mega/modeling_mega.py b/src/transformers/models/deprecated/mega/modeling_mega.py index 85d1015610..2c4a848df5 100644 --- a/src/transformers/models/deprecated/mega/modeling_mega.py +++ b/src/transformers/models/deprecated/mega/modeling_mega.py @@ -1684,7 +1684,7 @@ class MegaForCausalLM(MegaPreTrainedModel): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/deprecated/tapex/tokenization_tapex.py b/src/transformers/models/deprecated/tapex/tokenization_tapex.py index 3d554872c4..719d3c59f3 100644 --- a/src/transformers/models/deprecated/tapex/tokenization_tapex.py +++ b/src/transformers/models/deprecated/tapex/tokenization_tapex.py @@ -497,7 +497,7 @@ class TapexTokenizer(PreTrainedTokenizer): self, table: Union["pd.DataFrame", List["pd.DataFrame"]] = None, query: Optional[Union[TextInput, List[TextInput]]] = None, - answer: Union[str, List[str]] = None, + answer: Optional[Union[str, List[str]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, truncation: Union[bool, str, TruncationStrategy] = None, @@ -574,7 +574,7 @@ class TapexTokenizer(PreTrainedTokenizer): self, table: Union["pd.DataFrame", List["pd.DataFrame"]], query: Optional[Union[TextInput, List[TextInput]]] = None, - answer: Union[str, List[str]] = None, + answer: Optional[Union[str, List[str]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, truncation: Union[bool, str, TruncationStrategy] = None, @@ -662,10 +662,10 @@ class TapexTokenizer(PreTrainedTokenizer): self, table: Union["pd.DataFrame", List["pd.DataFrame"]], query: Optional[List[TextInput]] = None, - answer: List[str] = None, + answer: Optional[List[str]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = None, + truncation: Optional[Union[bool, str]] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -884,7 +884,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: Optional[str] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = None, + truncation: Optional[Union[bool, str]] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -1053,7 +1053,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: List[str], add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = None, + truncation: Optional[Union[bool, str]] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, @@ -1197,7 +1197,7 @@ class TapexTokenizer(PreTrainedTokenizer): answer: str, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str] = None, + truncation: Optional[Union[bool, str]] = None, max_length: Optional[int] = None, pad_to_multiple_of: Optional[int] = None, return_tensors: Optional[Union[str, TensorType]] = None, diff --git a/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py index 02d78c9340..a10b9b3b21 100644 --- a/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/deprecated/tvlt/image_processing_tvlt.py @@ -121,12 +121,12 @@ class TvltImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, patch_size: List[int] = [16, 16], num_frames: int = 8, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -221,10 +221,10 @@ class TvltImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -278,12 +278,12 @@ class TvltImageProcessor(BaseImageProcessor): self, videos: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, - patch_size: List[int] = None, + size: Optional[Dict[str, int]] = None, + patch_size: Optional[List[int]] = None, num_frames: Optional[int] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py index 7241087893..9f644bfc56 100644 --- a/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py @@ -93,10 +93,10 @@ class ViTHybridImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -193,7 +193,7 @@ class ViTHybridImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/depth_pro/convert_depth_pro_weights_to_hf.py b/src/transformers/models/depth_pro/convert_depth_pro_weights_to_hf.py index b24c6a5174..655bbdc023 100644 --- a/src/transformers/models/depth_pro/convert_depth_pro_weights_to_hf.py +++ b/src/transformers/models/depth_pro/convert_depth_pro_weights_to_hf.py @@ -15,6 +15,7 @@ import argparse import gc import os +from typing import Optional import regex as re import torch @@ -93,7 +94,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { # fmt: on -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): output_dict = {} if state_dict_keys is not None: old_text = "\n".join(state_dict_keys) diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 75d7e74add..0b365eafa1 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -732,7 +732,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -845,13 +845,13 @@ class DetrImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, @@ -1824,7 +1824,7 @@ class DetrImageProcessor(BaseImageProcessor): return results - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None): """ Converts the output of [`DetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/detr/image_processing_detr_fast.py b/src/transformers/models/detr/image_processing_detr_fast.py index dc14ec61f0..419d099e91 100644 --- a/src/transformers/models/detr/image_processing_detr_fast.py +++ b/src/transformers/models/detr/image_processing_detr_fast.py @@ -1088,7 +1088,7 @@ class DetrImageProcessorFast(BaseImageProcessorFast): return results # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.post_process_semantic_segmentation - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple[int, int]] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None): """ Converts the output of [`DetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/dinov2/modeling_flax_dinov2.py b/src/transformers/models/dinov2/modeling_flax_dinov2.py index 2766850e92..48afecde5e 100644 --- a/src/transformers/models/dinov2/modeling_flax_dinov2.py +++ b/src/transformers/models/dinov2/modeling_flax_dinov2.py @@ -592,7 +592,7 @@ class FlaxDinov2PreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/distilbert/modeling_flax_distilbert.py b/src/transformers/models/distilbert/modeling_flax_distilbert.py index 1f2b6ac96a..e9c12c4b08 100644 --- a/src/transformers/models/distilbert/modeling_flax_distilbert.py +++ b/src/transformers/models/distilbert/modeling_flax_distilbert.py @@ -459,7 +459,7 @@ class FlaxDistilBertPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, head_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/donut/image_processing_donut.py b/src/transformers/models/donut/image_processing_donut.py index 667c7ab3f6..c45e11430f 100644 --- a/src/transformers/models/donut/image_processing_donut.py +++ b/src/transformers/models/donut/image_processing_donut.py @@ -94,7 +94,7 @@ class DonutImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_thumbnail: bool = True, do_align_long_axis: bool = False, @@ -313,7 +313,7 @@ class DonutImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_thumbnail: Optional[bool] = None, do_align_long_axis: Optional[bool] = None, diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py index 9a35ee4b4a..a22548f5cd 100644 --- a/src/transformers/models/dpt/image_processing_dpt.py +++ b/src/transformers/models/dpt/image_processing_dpt.py @@ -154,7 +154,7 @@ class DPTImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, keep_aspect_ratio: bool = False, ensure_multiple_of: int = 1, @@ -299,7 +299,7 @@ class DPTImageProcessor(BaseImageProcessor): image: ImageInput, do_reduce_labels: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, @@ -340,7 +340,7 @@ class DPTImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, @@ -391,7 +391,7 @@ class DPTImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, keep_aspect_ratio: Optional[bool] = None, ensure_multiple_of: Optional[int] = None, @@ -592,7 +592,7 @@ class DPTImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) # Copied from transformers.models.beit.image_processing_beit.BeitImageProcessor.post_process_semantic_segmentation with Beit->DPT - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`DPTForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet.py b/src/transformers/models/efficientnet/image_processing_efficientnet.py index 612ede7086..6aa42f18ce 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py @@ -87,10 +87,10 @@ class EfficientNetImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PIL.Image.NEAREST, do_center_crop: bool = False, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, rescale_factor: Union[int, float] = 1 / 255, rescale_offset: bool = False, do_rescale: bool = True, @@ -213,10 +213,10 @@ class EfficientNetImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample=None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, rescale_offset: Optional[bool] = None, diff --git a/src/transformers/models/electra/modeling_flax_electra.py b/src/transformers/models/electra/modeling_flax_electra.py index 4bf75ff33e..7cc20ec27f 100644 --- a/src/transformers/models/electra/modeling_flax_electra.py +++ b/src/transformers/models/electra/modeling_flax_electra.py @@ -777,13 +777,13 @@ class FlaxElectraPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/emu3/configuration_emu3.py b/src/transformers/models/emu3/configuration_emu3.py index 5b5abedf40..60e5e55ab4 100644 --- a/src/transformers/models/emu3/configuration_emu3.py +++ b/src/transformers/models/emu3/configuration_emu3.py @@ -304,7 +304,7 @@ class Emu3Config(PretrainedConfig): self, vq_config: Union[Dict, Emu3VQVAEConfig] = None, text_config: Union[Dict, Emu3TextConfig] = None, - vocabulary_map: Dict[int, int] = None, + vocabulary_map: Optional[Dict[int, int]] = None, **kwargs, ): if vq_config is None: diff --git a/src/transformers/models/emu3/image_processing_emu3.py b/src/transformers/models/emu3/image_processing_emu3.py index a63269c99e..3780de93c3 100644 --- a/src/transformers/models/emu3/image_processing_emu3.py +++ b/src/transformers/models/emu3/image_processing_emu3.py @@ -309,7 +309,7 @@ class Emu3ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index 415fd058e4..1ed7a2a5ce 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -550,7 +550,7 @@ class EncoderDecoderModel(PreTrainedModel, GenerationMixin): decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.BoolTensor] = None, encoder_outputs: Optional[Tuple[torch.FloatTensor]] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py index ccb0aa0a6d..c37e7d3537 100644 --- a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py @@ -436,7 +436,7 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -508,12 +508,12 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -638,7 +638,7 @@ class FlaxEncoderDecoderModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py b/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py index 2c89feea43..5bb3f150f6 100644 --- a/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +++ b/src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py @@ -14,7 +14,7 @@ # limitations under the License. """FastSpeech2Conformer model configuration""" -from typing import Dict +from typing import Dict, Optional from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -459,8 +459,8 @@ class FastSpeech2ConformerWithHifiGanConfig(PretrainedConfig): def __init__( self, - model_config: Dict = None, - vocoder_config: Dict = None, + model_config: Optional[Dict] = None, + vocoder_config: Optional[Dict] = None, **kwargs, ): if model_config is None: diff --git a/src/transformers/models/flava/configuration_flava.py b/src/transformers/models/flava/configuration_flava.py index 7a18b33ac8..4f9a47b4d1 100644 --- a/src/transformers/models/flava/configuration_flava.py +++ b/src/transformers/models/flava/configuration_flava.py @@ -14,7 +14,7 @@ # limitations under the License. """FLAVA model configurations""" -from typing import Any, Dict +from typing import Any, Dict, Optional from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -472,10 +472,10 @@ class FlavaConfig(PretrainedConfig): def __init__( self, - image_config: Dict[str, Any] = None, - text_config: Dict[str, Any] = None, - multimodal_config: Dict[str, Any] = None, - image_codebook_config: Dict[str, Any] = None, + image_config: Optional[Dict[str, Any]] = None, + text_config: Optional[Dict[str, Any]] = None, + multimodal_config: Optional[Dict[str, Any]] = None, + image_codebook_config: Optional[Dict[str, Any]] = None, hidden_size: int = 768, layer_norm_eps: float = 1e-12, projection_dim: int = 768, diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py index 2b85a64cb8..caa03dca8c 100644 --- a/src/transformers/models/flava/image_processing_flava.py +++ b/src/transformers/models/flava/image_processing_flava.py @@ -228,10 +228,10 @@ class FlavaImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -392,10 +392,10 @@ class FlavaImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -457,7 +457,7 @@ class FlavaImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/fuyu/image_processing_fuyu.py b/src/transformers/models/fuyu/image_processing_fuyu.py index 080ff772e2..2984aac67d 100644 --- a/src/transformers/models/fuyu/image_processing_fuyu.py +++ b/src/transformers/models/fuyu/image_processing_fuyu.py @@ -537,7 +537,7 @@ class FuyuImageProcessor(BaseImageProcessor): } return FuyuBatchFeature(data=data, tensor_type=return_tensors) - def get_num_patches(self, image_height: int, image_width: int, patch_size: Dict[str, int] = None) -> int: + def get_num_patches(self, image_height: int, image_width: int, patch_size: Optional[Dict[str, int]] = None) -> int: """ Calculate number of patches required to encode an image. diff --git a/src/transformers/models/gemma/modeling_flax_gemma.py b/src/transformers/models/gemma/modeling_flax_gemma.py index 1b8c3671f0..237e92d949 100644 --- a/src/transformers/models/gemma/modeling_flax_gemma.py +++ b/src/transformers/models/gemma/modeling_flax_gemma.py @@ -485,8 +485,8 @@ class FlaxGemmaPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/gemma3/image_processing_gemma3.py b/src/transformers/models/gemma3/image_processing_gemma3.py index a138acef38..91e2d0c66a 100644 --- a/src/transformers/models/gemma3/image_processing_gemma3.py +++ b/src/transformers/models/gemma3/image_processing_gemma3.py @@ -95,7 +95,7 @@ class Gemma3ImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -241,7 +241,7 @@ class Gemma3ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/got_ocr2/convert_got_ocr2_weights_to_hf.py b/src/transformers/models/got_ocr2/convert_got_ocr2_weights_to_hf.py index 3df7214410..2bf4b3ac7d 100644 --- a/src/transformers/models/got_ocr2/convert_got_ocr2_weights_to_hf.py +++ b/src/transformers/models/got_ocr2/convert_got_ocr2_weights_to_hf.py @@ -61,7 +61,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { CONTEXT_LENGTH = 8000 -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py index 875c0742b9..dc06f1ef39 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2.py @@ -172,7 +172,7 @@ class GotOcr2ImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_to_patches: bool = False, min_patches: int = 1, max_patches: int = 12, @@ -419,7 +419,7 @@ class GotOcr2ImageProcessor(BaseImageProcessor): min_patches: int, max_patches: int, use_thumbnail: bool = True, - patch_size: Union[Tuple, int, dict] = None, + patch_size: Optional[Union[Tuple, int, dict]] = None, data_format: ChannelDimension = None, ): """ diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py index 8498e37803..e8b17c4ed3 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py @@ -114,7 +114,7 @@ class GotOcr2ImageProcessorFast(BaseImageProcessorFast): min_patches: int, max_patches: int, use_thumbnail: bool = True, - patch_size: Union[Tuple, int, dict] = None, + patch_size: Optional[Union[Tuple, int, dict]] = None, interpolation: Optional["F.InterpolationMode"] = None, ): """ diff --git a/src/transformers/models/gpt2/configuration_gpt2.py b/src/transformers/models/gpt2/configuration_gpt2.py index f3ebea0249..fb582998bf 100644 --- a/src/transformers/models/gpt2/configuration_gpt2.py +++ b/src/transformers/models/gpt2/configuration_gpt2.py @@ -194,7 +194,7 @@ class GPT2OnnxConfig(OnnxConfigWithPast): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, use_past: bool = False, ): super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past) diff --git a/src/transformers/models/gpt2/modeling_flax_gpt2.py b/src/transformers/models/gpt2/modeling_flax_gpt2.py index b6000aed5d..1382a25561 100644 --- a/src/transformers/models/gpt2/modeling_flax_gpt2.py +++ b/src/transformers/models/gpt2/modeling_flax_gpt2.py @@ -461,8 +461,8 @@ class FlaxGPT2PreTrainedModel(FlaxPreTrainedModel): position_ids=None, encoder_hidden_states: Optional[jnp.ndarray] = None, encoder_attention_mask: Optional[jnp.ndarray] = None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py index 7f74b80001..f7371d08b0 100644 --- a/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py @@ -404,8 +404,8 @@ class FlaxGPTNeoPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/gptj/configuration_gptj.py b/src/transformers/models/gptj/configuration_gptj.py index 5e76b3f4ba..5b59d309c2 100644 --- a/src/transformers/models/gptj/configuration_gptj.py +++ b/src/transformers/models/gptj/configuration_gptj.py @@ -140,7 +140,7 @@ class GPTJOnnxConfig(OnnxConfigWithPast): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, use_past: bool = False, ): super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past) diff --git a/src/transformers/models/gptj/modeling_flax_gptj.py b/src/transformers/models/gptj/modeling_flax_gptj.py index 01ec3acd50..ee88f69cc1 100644 --- a/src/transformers/models/gptj/modeling_flax_gptj.py +++ b/src/transformers/models/gptj/modeling_flax_gptj.py @@ -438,8 +438,8 @@ class FlaxGPTJPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py index 03a6c2e4e3..f32d3095ff 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py @@ -756,7 +756,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -899,13 +899,13 @@ class GroundingDinoImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/transformers/models/grounding_dino/modeling_grounding_dino.py index 755cfaf5d9..a741539a40 100644 --- a/src/transformers/models/grounding_dino/modeling_grounding_dino.py +++ b/src/transformers/models/grounding_dino/modeling_grounding_dino.py @@ -2554,7 +2554,7 @@ class GroundingDinoForObjectDetection(GroundingDinoPreTrainedModel): output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - labels: List[Dict[str, Union[torch.LongTensor, torch.FloatTensor]]] = None, + labels: Optional[List[Dict[str, Union[torch.LongTensor, torch.FloatTensor]]]] = None, ): r""" labels (`List[Dict]` of len `(batch_size,)`, *optional*): diff --git a/src/transformers/models/idefics/image_processing_idefics.py b/src/transformers/models/idefics/image_processing_idefics.py index 768ef893d2..17b7fb4f39 100644 --- a/src/transformers/models/idefics/image_processing_idefics.py +++ b/src/transformers/models/idefics/image_processing_idefics.py @@ -101,7 +101,7 @@ class IdeficsImageProcessor(BaseImageProcessor): image_size: Optional[Dict[str, int]] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - transform: Callable = None, + transform: Optional[Callable] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH, diff --git a/src/transformers/models/idefics2/image_processing_idefics2.py b/src/transformers/models/idefics2/image_processing_idefics2.py index 2a853fc02e..239a266d9b 100644 --- a/src/transformers/models/idefics2/image_processing_idefics2.py +++ b/src/transformers/models/idefics2/image_processing_idefics2.py @@ -190,7 +190,7 @@ class Idefics2ImageProcessor(BaseImageProcessor): self, do_convert_rgb: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, diff --git a/src/transformers/models/idefics3/image_processing_idefics3.py b/src/transformers/models/idefics3/image_processing_idefics3.py index b0677c4708..b2f049e998 100644 --- a/src/transformers/models/idefics3/image_processing_idefics3.py +++ b/src/transformers/models/idefics3/image_processing_idefics3.py @@ -295,10 +295,10 @@ class Idefics3ImageProcessor(BaseImageProcessor): self, do_convert_rgb: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.LANCZOS, do_image_splitting: bool = True, - max_image_size: Dict[str, int] = None, + max_image_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, diff --git a/src/transformers/models/ijepa/convert_ijepa_to_hf.py b/src/transformers/models/ijepa/convert_ijepa_to_hf.py index 5c15a72ff8..25d97df6ce 100644 --- a/src/transformers/models/ijepa/convert_ijepa_to_hf.py +++ b/src/transformers/models/ijepa/convert_ijepa_to_hf.py @@ -21,6 +21,7 @@ import argparse import gc import re from pathlib import Path +from typing import Optional import requests import torch @@ -63,7 +64,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { # fmt: on -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ Converts old keys to new keys using the mapping and dynamically removes the 'ijepa.' prefix if necessary. diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt.py b/src/transformers/models/imagegpt/image_processing_imagegpt.py index af13a2d317..5b941a6c77 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py @@ -89,7 +89,7 @@ class ImageGPTImageProcessor(BaseImageProcessor): # clusters is a first argument to maintain backwards compatibility with the old ImageGPTImageProcessor clusters: Optional[Union[List[List[int]], np.ndarray]] = None, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_normalize: bool = True, do_color_quantize: bool = True, @@ -180,7 +180,7 @@ class ImageGPTImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_normalize: Optional[bool] = None, do_color_quantize: Optional[bool] = None, diff --git a/src/transformers/models/informer/configuration_informer.py b/src/transformers/models/informer/configuration_informer.py index 028f5b3229..5cb34dc897 100644 --- a/src/transformers/models/informer/configuration_informer.py +++ b/src/transformers/models/informer/configuration_informer.py @@ -141,7 +141,7 @@ class InformerConfig(PretrainedConfig): distribution_output: str = "student_t", loss: str = "nll", input_size: int = 1, - lags_sequence: List[int] = None, + lags_sequence: Optional[List[int]] = None, scaling: Optional[Union[str, bool]] = "mean", num_dynamic_real_features: int = 0, num_static_real_features: int = 0, diff --git a/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py b/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py index 9c55ba60d3..32018a7954 100644 --- a/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py @@ -84,7 +84,7 @@ class InstructBlipVideoImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, diff --git a/src/transformers/models/layoutlm/configuration_layoutlm.py b/src/transformers/models/layoutlm/configuration_layoutlm.py index aebd25d536..e0f9ef60f6 100644 --- a/src/transformers/models/layoutlm/configuration_layoutlm.py +++ b/src/transformers/models/layoutlm/configuration_layoutlm.py @@ -139,7 +139,7 @@ class LayoutLMOnnxConfig(OnnxConfig): self, config: PretrainedConfig, task: str = "default", - patching_specs: List[PatchingSpec] = None, + patching_specs: Optional[List[PatchingSpec]] = None, ): super().__init__(config, task=task, patching_specs=patching_specs) self.max_2d_positions = config.max_2d_position_embeddings - 1 diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py index 5d946982fa..8a73e443de 100644 --- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py @@ -129,7 +129,7 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, apply_ocr: bool = True, ocr_lang: Optional[str] = None, @@ -201,7 +201,7 @@ class LayoutLMv2ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, apply_ocr: Optional[bool] = None, ocr_lang: Optional[str] = None, diff --git a/src/transformers/models/layoutlmv2/processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/processing_layoutlmv2.py index 39d34b3a99..a5ac6681c2 100644 --- a/src/transformers/models/layoutlmv2/processing_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/processing_layoutlmv2.py @@ -71,7 +71,7 @@ class LayoutLMv2Processor(ProcessorMixin): images, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py index 1fa23c32ae..2466bdc80d 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py @@ -406,7 +406,7 @@ class LayoutLMv2Tokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py index 5d36e9fd27..32d38be09b 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py @@ -157,7 +157,7 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index d322c78d7e..705a5e5123 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -146,13 +146,13 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_value: float = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, Iterable[float]] = None, - image_std: Union[float, Iterable[float]] = None, + image_mean: Optional[Union[float, Iterable[float]]] = None, + image_std: Optional[Union[float, Iterable[float]]] = None, apply_ocr: bool = True, ocr_lang: Optional[str] = None, tesseract_config: Optional[str] = "", @@ -228,13 +228,13 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample=None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, - image_mean: Union[float, Iterable[float]] = None, - image_std: Union[float, Iterable[float]] = None, + image_mean: Optional[Union[float, Iterable[float]]] = None, + image_std: Optional[Union[float, Iterable[float]]] = None, apply_ocr: Optional[bool] = None, ocr_lang: Optional[str] = None, tesseract_config: Optional[str] = None, diff --git a/src/transformers/models/layoutlmv3/processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/processing_layoutlmv3.py index 4bd9955775..209272ca35 100644 --- a/src/transformers/models/layoutlmv3/processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/processing_layoutlmv3.py @@ -71,7 +71,7 @@ class LayoutLMv3Processor(ProcessorMixin): images, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py index b88f7b4c1b..7758ba0acc 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py @@ -535,7 +535,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py index 737a50df9f..3d0cd26d80 100644 --- a/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py +++ b/src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py @@ -201,7 +201,7 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutxlm/processing_layoutxlm.py b/src/transformers/models/layoutxlm/processing_layoutxlm.py index 892a7c2cf1..b325221d9f 100644 --- a/src/transformers/models/layoutxlm/processing_layoutxlm.py +++ b/src/transformers/models/layoutxlm/processing_layoutxlm.py @@ -70,7 +70,7 @@ class LayoutXLMProcessor(ProcessorMixin): images, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py index 8dc459ba94..8aa85b4f31 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm.py @@ -441,7 +441,7 @@ class LayoutXLMTokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py index 4c16642c57..66d972f448 100644 --- a/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py +++ b/src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py @@ -269,7 +269,7 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/levit/image_processing_levit.py b/src/transformers/models/levit/image_processing_levit.py index d980bea555..8b3e4e4cf1 100644 --- a/src/transformers/models/levit/image_processing_levit.py +++ b/src/transformers/models/levit/image_processing_levit.py @@ -90,10 +90,10 @@ class LevitImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, diff --git a/src/transformers/models/llama/modeling_flax_llama.py b/src/transformers/models/llama/modeling_flax_llama.py index 1fed0a36c1..14bc16ede6 100644 --- a/src/transformers/models/llama/modeling_flax_llama.py +++ b/src/transformers/models/llama/modeling_flax_llama.py @@ -467,8 +467,8 @@ class FlaxLlamaPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/llama4/convert_llama4_weights_to_hf.py b/src/transformers/models/llama4/convert_llama4_weights_to_hf.py index 923d9ffc63..bce62169a4 100644 --- a/src/transformers/models/llama4/convert_llama4_weights_to_hf.py +++ b/src/transformers/models/llama4/convert_llama4_weights_to_hf.py @@ -90,7 +90,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { # fmt: on -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/llama4/modeling_llama4.py b/src/transformers/models/llama4/modeling_llama4.py index 0959199c2e..bc56310f18 100644 --- a/src/transformers/models/llama4/modeling_llama4.py +++ b/src/transformers/models/llama4/modeling_llama4.py @@ -1287,7 +1287,7 @@ class Llama4VisionEncoderLayer(nn.Module): hidden_state: torch.Tensor, freqs_ci: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, - output_attentions: bool = None, + output_attentions: Optional[bool] = None, ): # Self Attention residual = hidden_state diff --git a/src/transformers/models/llava/image_processing_llava.py b/src/transformers/models/llava/image_processing_llava.py index 37ef079c91..2940ed5c80 100644 --- a/src/transformers/models/llava/image_processing_llava.py +++ b/src/transformers/models/llava/image_processing_llava.py @@ -99,10 +99,10 @@ class LlavaImageProcessor(BaseImageProcessor): self, do_pad: bool = False, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, diff --git a/src/transformers/models/llava_next/image_processing_llava_next.py b/src/transformers/models/llava_next/image_processing_llava_next.py index 63246e8a53..e1afee3192 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next.py +++ b/src/transformers/models/llava_next/image_processing_llava_next.py @@ -168,11 +168,11 @@ class LlavaNextImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -330,7 +330,7 @@ class LlavaNextImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, @@ -559,8 +559,8 @@ class LlavaNextImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py index bedba000c7..06ee0fbdae 100644 --- a/src/transformers/models/llava_next_video/image_processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/image_processing_llava_next_video.py @@ -93,11 +93,11 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -180,7 +180,7 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, @@ -280,7 +280,7 @@ class LlavaNextVideoImageProcessor(BaseImageProcessor): self, images: VideoInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py index 5a9bb5e3ea..8cfdfee1f4 100644 --- a/src/transformers/models/llava_onevision/image_processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/image_processing_llava_onevision.py @@ -162,8 +162,8 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -454,7 +454,7 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -529,8 +529,8 @@ class LlavaOnevisionImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, - image_grid_pinpoints: List = None, + size: Optional[Dict[str, int]] = None, + image_grid_pinpoints: Optional[List] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/llava_onevision/video_processing_llava_onevision.py b/src/transformers/models/llava_onevision/video_processing_llava_onevision.py index 61ef776db8..b9ac7a6e4b 100644 --- a/src/transformers/models/llava_onevision/video_processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/video_processing_llava_onevision.py @@ -84,7 +84,7 @@ class LlavaOnevisionVideoProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -112,7 +112,7 @@ class LlavaOnevisionVideoProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -203,7 +203,7 @@ class LlavaOnevisionVideoProcessor(BaseImageProcessor): self, videos: VideoInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/longformer/configuration_longformer.py b/src/transformers/models/longformer/configuration_longformer.py index 7a4d698471..16bcba9fbb 100644 --- a/src/transformers/models/longformer/configuration_longformer.py +++ b/src/transformers/models/longformer/configuration_longformer.py @@ -139,7 +139,9 @@ class LongformerConfig(PretrainedConfig): class LongformerOnnxConfig(OnnxConfig): - def __init__(self, config: "PretrainedConfig", task: str = "default", patching_specs: "List[PatchingSpec]" = None): + def __init__( + self, config: "PretrainedConfig", task: str = "default", patching_specs: "Optional[List[PatchingSpec]]" = None + ): super().__init__(config, task, patching_specs) config.onnx_export = True diff --git a/src/transformers/models/longt5/modeling_flax_longt5.py b/src/transformers/models/longt5/modeling_flax_longt5.py index 1301704414..b9a341349f 100644 --- a/src/transformers/models/longt5/modeling_flax_longt5.py +++ b/src/transformers/models/longt5/modeling_flax_longt5.py @@ -1731,7 +1731,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1816,7 +1816,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1872,12 +1872,12 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): encoder_outputs, encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -2260,12 +2260,12 @@ class FlaxLongT5ForConditionalGeneration(FlaxLongT5PreTrainedModel): encoder_outputs, encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/marian/modeling_flax_marian.py b/src/transformers/models/marian/modeling_flax_marian.py index d4844b6fc3..2436158806 100644 --- a/src/transformers/models/marian/modeling_flax_marian.py +++ b/src/transformers/models/marian/modeling_flax_marian.py @@ -970,7 +970,7 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1032,12 +1032,12 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1150,7 +1150,7 @@ class FlaxMarianPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1299,12 +1299,12 @@ class FlaxMarianMTModel(FlaxMarianPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index fb3a8f7a65..9884b6d7e9 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -1155,7 +1155,7 @@ class TFMarianMainLayer(keras.layers.Layer): decoder_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Tuple[Tuple[tf.Tensor]] = None, + past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, diff --git a/src/transformers/models/markuplm/tokenization_markuplm.py b/src/transformers/models/markuplm/tokenization_markuplm.py index 26ba704150..6d1edf2bbb 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm.py +++ b/src/transformers/models/markuplm/tokenization_markuplm.py @@ -495,7 +495,7 @@ class MarkupLMTokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - xpaths: Union[List[List[int]], List[List[List[int]]]] = None, + xpaths: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/markuplm/tokenization_markuplm_fast.py b/src/transformers/models/markuplm/tokenization_markuplm_fast.py index 55d75e3541..e9e9a11953 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm_fast.py +++ b/src/transformers/models/markuplm/tokenization_markuplm_fast.py @@ -270,7 +270,7 @@ class MarkupLMTokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - xpaths: Union[List[List[int]], List[List[List[int]]]] = None, + xpaths: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, node_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index 5c61431bf0..2a3faf1831 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -207,7 +207,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -404,14 +404,14 @@ class Mask2FormerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, num_labels: Optional[int] = None, @@ -576,7 +576,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, @@ -600,7 +600,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, @@ -642,7 +642,7 @@ class Mask2FormerImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 0, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: diff --git a/src/transformers/models/mask2former/modeling_mask2former.py b/src/transformers/models/mask2former/modeling_mask2former.py index 107267bb51..bf0bedd291 100644 --- a/src/transformers/models/mask2former/modeling_mask2former.py +++ b/src/transformers/models/mask2former/modeling_mask2former.py @@ -1804,7 +1804,7 @@ class Mask2FormerMaskedAttentionDecoder(nn.Module): pixel_embeddings: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None, query_position_embeddings: Optional[torch.Tensor] = None, - feature_size_list: List = None, + feature_size_list: Optional[List] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index f433678019..32fc423f08 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -213,7 +213,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -410,14 +410,14 @@ class MaskFormerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, num_labels: Optional[int] = None, @@ -579,7 +579,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, @@ -603,7 +603,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, @@ -645,7 +645,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 0, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: @@ -973,7 +973,7 @@ class MaskFormerImageProcessor(BaseImageProcessor): return encoded_inputs def post_process_segmentation( - self, outputs: "MaskFormerForInstanceSegmentationOutput", target_size: Tuple[int, int] = None + self, outputs: "MaskFormerForInstanceSegmentationOutput", target_size: Optional[Tuple[int, int]] = None ) -> "torch.Tensor": """ Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image segmentation predictions. Only diff --git a/src/transformers/models/mbart/modeling_flax_mbart.py b/src/transformers/models/mbart/modeling_flax_mbart.py index 2f1b650a5d..1e019f5199 100644 --- a/src/transformers/models/mbart/modeling_flax_mbart.py +++ b/src/transformers/models/mbart/modeling_flax_mbart.py @@ -1045,7 +1045,7 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1106,12 +1106,12 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1223,7 +1223,7 @@ class FlaxMBartPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1371,12 +1371,12 @@ class FlaxMBartForConditionalGeneration(FlaxMBartPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index dd9bf976a2..16c53caa3f 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -1430,7 +1430,7 @@ class TFMBartForConditionalGeneration(TFMBartPreTrainedModel, TFCausalLanguageMo decoder_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[TFBaseModelOutput] = None, - past_key_values: Tuple[Tuple[tf.Tensor]] = None, + past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, diff --git a/src/transformers/models/mistral/modeling_flax_mistral.py b/src/transformers/models/mistral/modeling_flax_mistral.py index f02446ae3e..c90bf25a95 100644 --- a/src/transformers/models/mistral/modeling_flax_mistral.py +++ b/src/transformers/models/mistral/modeling_flax_mistral.py @@ -461,8 +461,8 @@ class FlaxMistralPreTrainedModel(FlaxPreTrainedModel): input_ids, attention_mask=None, position_ids=None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py index f5f338fd8d..8cb9e78daa 100644 --- a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py +++ b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py @@ -90,7 +90,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { CONTEXT_LENGTH = 131072 -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py index c9f96a955e..2342979f0d 100644 --- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py @@ -92,7 +92,7 @@ class MobileNetV1ImageProcessor(BaseImageProcessor): size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -171,10 +171,10 @@ class MobileNetV1ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py index ca6aa04c14..e6b909a8e4 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py @@ -98,7 +98,7 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -177,10 +177,10 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -309,7 +309,7 @@ class MobileNetV2ImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) # Copied from transformers.models.beit.image_processing_beit.BeitImageProcessor.post_process_semantic_segmentation with Beit->MobileNetV2 - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`MobileNetV2ForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit.py b/src/transformers/models/mobilevit/image_processing_mobilevit.py index 23ceae679f..c23de20ee5 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py @@ -90,12 +90,12 @@ class MobileViTImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_flip_channel_order: bool = True, **kwargs, ) -> None: @@ -223,12 +223,12 @@ class MobileViTImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_flip_channel_order: Optional[bool] = None, data_format: Optional[Union[str, ChannelDimension]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -265,9 +265,9 @@ class MobileViTImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: """Preprocesses a single mask.""" @@ -305,12 +305,12 @@ class MobileViTImageProcessor(BaseImageProcessor): images: ImageInput, segmentation_maps: Optional[ImageInput] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_flip_channel_order: Optional[bool] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: ChannelDimension = ChannelDimension.FIRST, @@ -440,7 +440,7 @@ class MobileViTImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) # Copied from transformers.models.beit.image_processing_beit.BeitImageProcessor.post_process_semantic_segmentation with Beit->MobileViT - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`MobileViTForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/moshi/modeling_moshi.py b/src/transformers/models/moshi/modeling_moshi.py index 6fa1c0c5e4..a7387004a6 100644 --- a/src/transformers/models/moshi/modeling_moshi.py +++ b/src/transformers/models/moshi/modeling_moshi.py @@ -1067,7 +1067,7 @@ class MoshiDepthDecoder(MoshiPreTrainedModel, GenerationMixin): input_ids: Optional[torch.LongTensor] = None, last_hidden_state: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.BoolTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -1931,7 +1931,7 @@ class MoshiForConditionalGeneration(MoshiPreTrainedModel, GenerationMixin): user_audio_codes: Optional[torch.Tensor] = None, moshi_input_values: Optional[torch.FloatTensor] = None, moshi_audio_codes: Optional[torch.Tensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, text_labels: Optional[torch.LongTensor] = None, audio_labels: Optional[torch.LongTensor] = None, diff --git a/src/transformers/models/musicgen/modeling_musicgen.py b/src/transformers/models/musicgen/modeling_musicgen.py index bea8d9c637..e18c09a11b 100644 --- a/src/transformers/models/musicgen/modeling_musicgen.py +++ b/src/transformers/models/musicgen/modeling_musicgen.py @@ -2018,7 +2018,7 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin): decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.BoolTensor] = None, encoder_outputs: Optional[Tuple[torch.FloatTensor]] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, @@ -2439,7 +2439,7 @@ class MusicgenForConditionalGeneration(PreTrainedModel, GenerationMixin): return torch.ones((batch_size, 1), dtype=torch.long, device=self.device) * bos_token_id def _get_decoder_start_token_id( - self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: Optional[int] = None + self, decoder_start_token_id: Optional[Union[int, List[int]]] = None, bos_token_id: Optional[int] = None ) -> int: decoder_start_token_id = ( decoder_start_token_id diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index a3cc95690d..2489ec9a38 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -1917,7 +1917,7 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin): input_features: Optional[torch.FloatTensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.BoolTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, decoder_inputs_embeds: Optional[torch.FloatTensor] = None, @@ -2310,7 +2310,7 @@ class MusicgenMelodyForConditionalGeneration(PreTrainedModel, GenerationMixin): # Copied from transformers.models.musicgen.modeling_musicgen.MusicgenForConditionalGeneration._get_decoder_start_token_id def _get_decoder_start_token_id( - self, decoder_start_token_id: Union[int, List[int]] = None, bos_token_id: Optional[int] = None + self, decoder_start_token_id: Optional[Union[int, List[int]]] = None, bos_token_id: Optional[int] = None ) -> int: decoder_start_token_id = ( decoder_start_token_id diff --git a/src/transformers/models/nougat/image_processing_nougat.py b/src/transformers/models/nougat/image_processing_nougat.py index 25b5c5e7bc..9d38a0afaf 100644 --- a/src/transformers/models/nougat/image_processing_nougat.py +++ b/src/transformers/models/nougat/image_processing_nougat.py @@ -96,7 +96,7 @@ class NougatImageProcessor(BaseImageProcessor): self, do_crop_margin: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_thumbnail: bool = True, do_align_long_axis: bool = False, @@ -373,13 +373,13 @@ class NougatImageProcessor(BaseImageProcessor): images: ImageInput, do_crop_margin: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_thumbnail: Optional[bool] = None, do_align_long_axis: Optional[bool] = None, do_pad: Optional[bool] = None, do_rescale: Optional[bool] = None, - rescale_factor: Union[int, float] = None, + rescale_factor: Optional[Union[int, float]] = None, do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, diff --git a/src/transformers/models/nougat/processing_nougat.py b/src/transformers/models/nougat/processing_nougat.py index ca395e261a..6f48f23d58 100644 --- a/src/transformers/models/nougat/processing_nougat.py +++ b/src/transformers/models/nougat/processing_nougat.py @@ -52,13 +52,13 @@ class NougatProcessor(ProcessorMixin): text=None, do_crop_margin: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: "PILImageResampling" = None, # noqa: F821 do_thumbnail: Optional[bool] = None, do_align_long_axis: Optional[bool] = None, do_pad: Optional[bool] = None, do_rescale: Optional[bool] = None, - rescale_factor: Union[int, float] = None, + rescale_factor: Optional[Union[int, float]] = None, do_normalize: Optional[bool] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, diff --git a/src/transformers/models/omdet_turbo/processing_omdet_turbo.py b/src/transformers/models/omdet_turbo/processing_omdet_turbo.py index 3f5c51779b..618c67c783 100644 --- a/src/transformers/models/omdet_turbo/processing_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/processing_omdet_turbo.py @@ -227,7 +227,7 @@ class OmDetTurboProcessor(ProcessorMixin): def __call__( self, images: ImageInput = None, - text: Union[List[str], List[List[str]]] = None, + text: Optional[Union[List[str], List[List[str]]]] = None, audio=None, videos=None, **kwargs: Unpack[OmDetTurboProcessorKwargs], diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py index 956bd3e7e2..068d6afd21 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer.py +++ b/src/transformers/models/oneformer/image_processing_oneformer.py @@ -210,7 +210,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -430,13 +430,13 @@ class OneFormerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, ignore_index: Optional[int] = None, do_reduce_labels: bool = False, repo_path: Optional[str] = "shi-labs/oneformer_demo", @@ -583,7 +583,7 @@ class OneFormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -604,7 +604,7 @@ class OneFormerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -644,7 +644,7 @@ class OneFormerImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: """Preprocesses a single mask.""" diff --git a/src/transformers/models/opt/modeling_flax_opt.py b/src/transformers/models/opt/modeling_flax_opt.py index fc023bb4ae..97637a83d1 100644 --- a/src/transformers/models/opt/modeling_flax_opt.py +++ b/src/transformers/models/opt/modeling_flax_opt.py @@ -585,8 +585,8 @@ class FlaxOPTPreTrainedModel(FlaxPreTrainedModel): input_ids: jnp.ndarray, attention_mask: Optional[jnp.ndarray] = None, position_ids: Optional[jnp.ndarray] = None, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/owlv2/image_processing_owlv2.py b/src/transformers/models/owlv2/image_processing_owlv2.py index bc211d1fb4..b7c7785bc6 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2.py +++ b/src/transformers/models/owlv2/image_processing_owlv2.py @@ -248,7 +248,7 @@ class Owlv2ImageProcessor(BaseImageProcessor): rescale_factor: Union[int, float] = 1 / 255, do_pad: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, @@ -371,7 +371,7 @@ class Owlv2ImageProcessor(BaseImageProcessor): images: ImageInput, do_pad: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py b/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py index 83f374651a..856553336f 100644 --- a/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py +++ b/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py @@ -185,10 +185,10 @@ class PatchTSMixerConfig(PretrainedConfig): distribution_output: str = "student_t", # Prediction head configuration prediction_length: int = 16, - prediction_channel_indices: list = None, + prediction_channel_indices: Optional[list] = None, # Classification/Regression configuration num_targets: int = 3, - output_range: list = None, + output_range: Optional[list] = None, head_aggregation: str = "max_pool", **kwargs, ): diff --git a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py index 2238426cd0..2336338089 100644 --- a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py +++ b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py @@ -816,7 +816,7 @@ class PatchTSMixerPretrainHead(nn.Module): def random_masking( inputs: torch.Tensor, mask_ratio: float, - unmasked_channel_indices: list = None, + unmasked_channel_indices: Optional[list] = None, channel_consistent_masking: bool = False, mask_value: int = 0, ): @@ -875,7 +875,7 @@ def random_masking( def forecast_masking( inputs: torch.Tensor, num_forecast_mask_patches: Union[list, int], - unmasked_channel_indices: list = None, + unmasked_channel_indices: Optional[list] = None, mask_value: int = 0, ): """Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches. diff --git a/src/transformers/models/patchtst/modeling_patchtst.py b/src/transformers/models/patchtst/modeling_patchtst.py index 95897db85f..7ee66bec70 100755 --- a/src/transformers/models/patchtst/modeling_patchtst.py +++ b/src/transformers/models/patchtst/modeling_patchtst.py @@ -218,7 +218,7 @@ class PatchTSTBatchNorm(nn.Module): def random_masking( inputs: torch.Tensor, mask_ratio: float, - unmasked_channel_indices: list = None, + unmasked_channel_indices: Optional[list] = None, channel_consistent_masking: bool = False, mask_value: int = 0, ): @@ -276,7 +276,7 @@ def random_masking( def forecast_masking( inputs: torch.Tensor, num_forecast_mask_patches: Union[list, int], - unmasked_channel_indices: list = None, + unmasked_channel_indices: Optional[list] = None, mask_value: int = 0, ): """Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches. diff --git a/src/transformers/models/pegasus/modeling_flax_pegasus.py b/src/transformers/models/pegasus/modeling_flax_pegasus.py index b7c7a8fd55..89b8450312 100644 --- a/src/transformers/models/pegasus/modeling_flax_pegasus.py +++ b/src/transformers/models/pegasus/modeling_flax_pegasus.py @@ -988,7 +988,7 @@ class FlaxPegasusPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1049,12 +1049,12 @@ class FlaxPegasusPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1167,7 +1167,7 @@ class FlaxPegasusPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1317,12 +1317,12 @@ class FlaxPegasusForConditionalGeneration(FlaxPegasusPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, deterministic: bool = True, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index a51835dcfa..15176c92b0 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -933,7 +933,7 @@ class TFPegasusDecoder(keras.layers.Layer): encoder_attention_mask: tf.Tensor | None = None, head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None, - past_key_values: Tuple[Tuple[tf.Tensor]] = None, + past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -1170,7 +1170,7 @@ class TFPegasusMainLayer(keras.layers.Layer): decoder_head_mask: tf.Tensor | None = None, cross_attn_head_mask: tf.Tensor | None = None, encoder_outputs: Optional[Union[Tuple, TFBaseModelOutput]] = None, - past_key_values: Tuple[Tuple[tf.Tensor]] = None, + past_key_values: Optional[Tuple[Tuple[tf.Tensor]]] = None, inputs_embeds: tf.Tensor | None = None, decoder_inputs_embeds: tf.Tensor | None = None, use_cache: Optional[bool] = None, diff --git a/src/transformers/models/perceiver/image_processing_perceiver.py b/src/transformers/models/perceiver/image_processing_perceiver.py index 82d5713473..2edd4bef93 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver.py +++ b/src/transformers/models/perceiver/image_processing_perceiver.py @@ -88,9 +88,9 @@ class PerceiverImageProcessor(BaseImageProcessor): def __init__( self, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, diff --git a/src/transformers/models/pix2struct/image_processing_pix2struct.py b/src/transformers/models/pix2struct/image_processing_pix2struct.py index e9db5175b2..f052521236 100644 --- a/src/transformers/models/pix2struct/image_processing_pix2struct.py +++ b/src/transformers/models/pix2struct/image_processing_pix2struct.py @@ -213,7 +213,7 @@ class Pix2StructImageProcessor(BaseImageProcessor): self, do_convert_rgb: bool = True, do_normalize: bool = True, - patch_size: Dict[str, int] = None, + patch_size: Optional[Dict[str, int]] = None, max_patches: int = 2048, is_vqa: bool = False, **kwargs, diff --git a/src/transformers/models/pixtral/image_processing_pixtral.py b/src/transformers/models/pixtral/image_processing_pixtral.py index 8579cf08af..7cd9149e38 100644 --- a/src/transformers/models/pixtral/image_processing_pixtral.py +++ b/src/transformers/models/pixtral/image_processing_pixtral.py @@ -175,8 +175,8 @@ class PixtralImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - patch_size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, + patch_size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -320,8 +320,8 @@ class PixtralImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, - patch_size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, + patch_size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index cd4c4bb770..25c00cc5fe 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -103,11 +103,11 @@ class PoolFormerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_pct: int = 0.9, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, rescale_factor: Union[int, float] = 1 / 255, do_rescale: bool = True, do_normalize: bool = True, @@ -214,11 +214,11 @@ class PoolFormerImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, crop_pct: Optional[int] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/pop2piano/processing_pop2piano.py b/src/transformers/models/pop2piano/processing_pop2piano.py index 3b839f8b1f..437d4efaef 100644 --- a/src/transformers/models/pop2piano/processing_pop2piano.py +++ b/src/transformers/models/pop2piano/processing_pop2piano.py @@ -52,7 +52,7 @@ class Pop2PianoProcessor(ProcessorMixin): def __call__( self, audio: Union[np.ndarray, List[float], List[np.ndarray]] = None, - sampling_rate: Union[int, List[int]] = None, + sampling_rate: Optional[Union[int, List[int]]] = None, steps_per_beat: int = 2, resample: Optional[bool] = True, notes: Union[List, TensorType] = None, diff --git a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py index 237be38fff..6ae239ab13 100644 --- a/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py +++ b/src/transformers/models/prompt_depth_anything/convert_prompt_depth_anything_to_hf.py @@ -18,6 +18,7 @@ https://github.com/DepthAnything/PromptDA""" import argparse import re from pathlib import Path +from typing import Optional import requests import torch @@ -130,7 +131,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { } -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ Convert old state dict keys to new keys using regex patterns. """ diff --git a/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py b/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py index 2940932fe5..00fcd2b17a 100644 --- a/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +++ b/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py @@ -142,7 +142,7 @@ class PromptDepthAnythingImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, keep_aspect_ratio: bool = False, ensure_multiple_of: int = 1, diff --git a/src/transformers/models/pvt/image_processing_pvt.py b/src/transformers/models/pvt/image_processing_pvt.py index 75fcba6ea1..6915fff6f9 100644 --- a/src/transformers/models/pvt/image_processing_pvt.py +++ b/src/transformers/models/pvt/image_processing_pvt.py @@ -150,7 +150,7 @@ class PvtImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py index c90c90c72b..830f15df18 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py @@ -124,7 +124,7 @@ class Qwen2VLImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -170,7 +170,7 @@ class Qwen2VLImageProcessor(BaseImageProcessor): self, images: Union[ImageInput, VideoInput], do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -303,7 +303,7 @@ class Qwen2VLImageProcessor(BaseImageProcessor): images: ImageInput, videos: VideoInput = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, min_pixels: Optional[int] = None, max_pixels: Optional[int] = None, resample: PILImageResampling = None, diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py index d44ea279cb..350d12ca87 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py @@ -256,7 +256,7 @@ class Qwen2VLImageProcessorFast(BaseImageProcessorFast): images: ImageInput, videos: VideoInput = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: Optional[Union["PILImageResampling", "F.InterpolationMode"]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index 822347950a..1adce9f7b0 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -1375,7 +1375,7 @@ class RagTokenForGeneration(RagPreTrainedModel, GenerationMixin): doc_scores: Optional[torch.FloatTensor] = None, n_docs: Optional[int] = None, generation_config: Optional[GenerationConfig] = None, - prefix_allowed_tokens_fn: Callable[[int, torch.Tensor], List[int]] = None, + prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, logits_processor: Optional[LogitsProcessorList] = LogitsProcessorList(), stopping_criteria: Optional[StoppingCriteriaList] = StoppingCriteriaList(), **kwargs, diff --git a/src/transformers/models/regnet/modeling_flax_regnet.py b/src/transformers/models/regnet/modeling_flax_regnet.py index 4a4e0a424a..8d2921ea14 100644 --- a/src/transformers/models/regnet/modeling_flax_regnet.py +++ b/src/transformers/models/regnet/modeling_flax_regnet.py @@ -598,7 +598,7 @@ class FlaxRegNetPreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, train: bool = False, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/rembert/modeling_rembert.py b/src/transformers/models/rembert/modeling_rembert.py index ef3250a712..ac4ee0a970 100755 --- a/src/transformers/models/rembert/modeling_rembert.py +++ b/src/transformers/models/rembert/modeling_rembert.py @@ -239,7 +239,7 @@ class RemBertSelfAttention(nn.Module): head_mask: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, - past_key_value: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, output_attentions: bool = False, ) -> Tuple: mixed_query_layer = self.query(hidden_states) diff --git a/src/transformers/models/resnet/modeling_flax_resnet.py b/src/transformers/models/resnet/modeling_flax_resnet.py index aa6c84c0fd..e6aba34cbe 100644 --- a/src/transformers/models/resnet/modeling_flax_resnet.py +++ b/src/transformers/models/resnet/modeling_flax_resnet.py @@ -489,7 +489,7 @@ class FlaxResNetPreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, train: bool = False, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, diff --git a/src/transformers/models/roberta/modeling_flax_roberta.py b/src/transformers/models/roberta/modeling_flax_roberta.py index 2beb0a06b8..3eb6d53929 100644 --- a/src/transformers/models/roberta/modeling_flax_roberta.py +++ b/src/transformers/models/roberta/modeling_flax_roberta.py @@ -824,13 +824,13 @@ class FlaxRobertaPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index f2dfa19a6a..8b77f2bd63 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -1040,7 +1040,7 @@ class RobertaForCausalLM(RobertaPreTrainedModel, GenerationMixin): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py index 1e691c047b..ca90ce96fb 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py @@ -828,13 +828,13 @@ class FlaxRobertaPreLayerNormPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py index 6b0c40b222..35c6550ff9 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py @@ -894,7 +894,7 @@ class RobertaPreLayerNormForCausalLM(RobertaPreLayerNormPreTrainedModel, Generat encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/roformer/modeling_flax_roformer.py b/src/transformers/models/roformer/modeling_flax_roformer.py index f47146f16b..c320407813 100644 --- a/src/transformers/models/roformer/modeling_flax_roformer.py +++ b/src/transformers/models/roformer/modeling_flax_roformer.py @@ -648,7 +648,7 @@ class FlaxRoFormerPreTrainedModel(FlaxPreTrainedModel): attention_mask=None, token_type_ids=None, head_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr.py b/src/transformers/models/rt_detr/image_processing_rt_detr.py index e458de3794..cdefc9ff94 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr.py @@ -440,13 +440,13 @@ class RTDetrImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = False, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: bool = True, do_pad: bool = False, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py b/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py index 51372b74e4..1a86ac02af 100644 --- a/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py +++ b/src/transformers/models/rt_detr_v2/convert_rt_detr_v2_weights_to_hf.py @@ -18,6 +18,7 @@ import argparse import json import re from pathlib import Path +from typing import Optional import requests import torch @@ -159,7 +160,7 @@ ORIGINAL_TO_CONVERTED_KEY_MAPPING = { } -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): # Use the mapping to rename keys for original_key, converted_key in ORIGINAL_TO_CONVERTED_KEY_MAPPING.items(): for key in list(state_dict_keys.keys()): diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py index 9288cc1485..3142d9d198 100644 --- a/src/transformers/models/sam/image_processing_sam.py +++ b/src/transformers/models/sam/image_processing_sam.py @@ -118,8 +118,8 @@ class SamImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, - mask_size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, + mask_size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -294,7 +294,7 @@ class SamImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -349,7 +349,7 @@ class SamImageProcessor(BaseImageProcessor): self, segmentation_map: ImageInput, do_resize: Optional[bool] = None, - mask_size: Dict[str, int] = None, + mask_size: Optional[Dict[str, int]] = None, do_pad: Optional[bool] = None, mask_pad_size: Optional[Dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py index 79cbe47482..84fec78a2a 100644 --- a/src/transformers/models/segformer/image_processing_segformer.py +++ b/src/transformers/models/segformer/image_processing_segformer.py @@ -99,7 +99,7 @@ class SegformerImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -222,7 +222,7 @@ class SegformerImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -264,7 +264,7 @@ class SegformerImageProcessor(BaseImageProcessor): segmentation_map: ImageInput, do_reduce_labels: Optional[bool] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: """Preprocesses a single mask.""" @@ -437,7 +437,7 @@ class SegformerImageProcessor(BaseImageProcessor): return BatchFeature(data=data, tensor_type=return_tensors) # Copied from transformers.models.beit.image_processing_beit.BeitImageProcessor.post_process_semantic_segmentation with Beit->Segformer - def post_process_semantic_segmentation(self, outputs, target_sizes: List[Tuple] = None): + def post_process_semantic_segmentation(self, outputs, target_sizes: Optional[List[Tuple]] = None): """ Converts the output of [`SegformerForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch. diff --git a/src/transformers/models/seggpt/image_processing_seggpt.py b/src/transformers/models/seggpt/image_processing_seggpt.py index 26c7c1f47a..b469586de8 100644 --- a/src/transformers/models/seggpt/image_processing_seggpt.py +++ b/src/transformers/models/seggpt/image_processing_seggpt.py @@ -247,7 +247,7 @@ class SegGptImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, @@ -394,7 +394,7 @@ class SegGptImageProcessor(BaseImageProcessor): prompt_images: Optional[ImageInput] = None, prompt_masks: Optional[ImageInput] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/siglip/image_processing_siglip.py b/src/transformers/models/siglip/image_processing_siglip.py index 7ec6c36d39..ae9dabb3ed 100644 --- a/src/transformers/models/siglip/image_processing_siglip.py +++ b/src/transformers/models/siglip/image_processing_siglip.py @@ -82,7 +82,7 @@ class SiglipImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, @@ -112,7 +112,7 @@ class SiglipImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/smolvlm/image_processing_smolvlm.py b/src/transformers/models/smolvlm/image_processing_smolvlm.py index abf9353fd4..92b432de0e 100644 --- a/src/transformers/models/smolvlm/image_processing_smolvlm.py +++ b/src/transformers/models/smolvlm/image_processing_smolvlm.py @@ -292,10 +292,10 @@ class SmolVLMImageProcessor(BaseImageProcessor): self, do_convert_rgb: bool = True, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.LANCZOS, do_image_splitting: bool = True, - max_image_size: Dict[str, int] = None, + max_image_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_normalize: bool = True, diff --git a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py index 772b1d23bd..e266460346 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py @@ -474,7 +474,7 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): return_dict: Optional[bool] = None, train: bool = False, freeze_feature_encoder: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -542,12 +542,12 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -671,7 +671,7 @@ class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel): return_dict: Optional[bool] = None, train: bool = False, freeze_feature_encoder: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/superglue/configuration_superglue.py b/src/transformers/models/superglue/configuration_superglue.py index fe301442d6..caebe86d9c 100644 --- a/src/transformers/models/superglue/configuration_superglue.py +++ b/src/transformers/models/superglue/configuration_superglue.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, List +from typing import TYPE_CHECKING, List, Optional from ...configuration_utils import PretrainedConfig from ...utils import logging @@ -73,8 +73,8 @@ class SuperGlueConfig(PretrainedConfig): self, keypoint_detector_config: "SuperPointConfig" = None, hidden_size: int = 256, - keypoint_encoder_sizes: List[int] = None, - gnn_layers_types: List[str] = None, + keypoint_encoder_sizes: Optional[List[int]] = None, + gnn_layers_types: Optional[List[str]] = None, num_attention_heads: int = 4, sinkhorn_iterations: int = 100, matching_threshold: float = 0.0, diff --git a/src/transformers/models/superglue/image_processing_superglue.py b/src/transformers/models/superglue/image_processing_superglue.py index 4a858db8f4..c2e1f93626 100644 --- a/src/transformers/models/superglue/image_processing_superglue.py +++ b/src/transformers/models/superglue/image_processing_superglue.py @@ -161,7 +161,7 @@ class SuperGlueImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: float = 1 / 255, @@ -223,7 +223,7 @@ class SuperGlueImageProcessor(BaseImageProcessor): self, images, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 77802d2e5c..e0835934df 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -122,7 +122,7 @@ class SuperPointImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: float = 1 / 255, do_grayscale: bool = False, @@ -181,7 +181,7 @@ class SuperPointImageProcessor(BaseImageProcessor): self, images, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_grayscale: Optional[bool] = None, diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index be76fe1b77..1fa8da5c2d 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -993,7 +993,7 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1078,7 +1078,7 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1134,12 +1134,12 @@ class FlaxT5PreTrainedModel(FlaxPreTrainedModel): encoder_outputs, encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1462,7 +1462,7 @@ class FlaxT5EncoderModel(FlaxT5PreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1612,12 +1612,12 @@ class FlaxT5ForConditionalGeneration(FlaxT5PreTrainedModel): encoder_outputs, encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/textnet/image_processing_textnet.py b/src/transformers/models/textnet/image_processing_textnet.py index 74806a0556..f75d1db097 100644 --- a/src/transformers/models/textnet/image_processing_textnet.py +++ b/src/transformers/models/textnet/image_processing_textnet.py @@ -94,11 +94,11 @@ class TextNetImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = False, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -204,7 +204,7 @@ class TextNetImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: Optional[int] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, diff --git a/src/transformers/models/tvp/image_processing_tvp.py b/src/transformers/models/tvp/image_processing_tvp.py index be19d893a6..81129a54f1 100644 --- a/src/transformers/models/tvp/image_processing_tvp.py +++ b/src/transformers/models/tvp/image_processing_tvp.py @@ -136,14 +136,14 @@ class TvpImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_pad: bool = True, - pad_size: Dict[str, int] = None, + pad_size: Optional[Dict[str, int]] = None, constant_values: Union[float, Iterable[float]] = 0, pad_mode: PaddingMode = PaddingMode.CONSTANT, do_normalize: bool = True, @@ -219,7 +219,7 @@ class TvpImageProcessor(BaseImageProcessor): def pad_image( self, image: np.ndarray, - pad_size: Dict[str, int] = None, + pad_size: Optional[Dict[str, int]] = None, constant_values: Union[float, Iterable[float]] = 0, pad_mode: PaddingMode = PaddingMode.CONSTANT, data_format: Optional[Union[str, ChannelDimension]] = None, @@ -267,15 +267,15 @@ class TvpImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_pad: bool = True, - pad_size: Dict[str, int] = None, - constant_values: Union[float, Iterable[float]] = None, + pad_size: Optional[Dict[str, int]] = None, + constant_values: Optional[Union[float, Iterable[float]]] = None, pad_mode: PaddingMode = None, do_normalize: Optional[bool] = None, do_flip_channel_order: Optional[bool] = None, @@ -341,15 +341,15 @@ class TvpImageProcessor(BaseImageProcessor): self, videos: Union[ImageInput, List[ImageInput], List[List[ImageInput]]], do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_pad: Optional[bool] = None, - pad_size: Dict[str, int] = None, - constant_values: Union[float, Iterable[float]] = None, + pad_size: Optional[Dict[str, int]] = None, + constant_values: Optional[Union[float, Iterable[float]]] = None, pad_mode: PaddingMode = None, do_normalize: Optional[bool] = None, do_flip_channel_order: Optional[bool] = None, diff --git a/src/transformers/models/tvp/modeling_tvp.py b/src/transformers/models/tvp/modeling_tvp.py index 91a0fc10b1..d7e2058152 100644 --- a/src/transformers/models/tvp/modeling_tvp.py +++ b/src/transformers/models/tvp/modeling_tvp.py @@ -919,7 +919,7 @@ class TvpForVideoGrounding(TvpPreTrainedModel): input_ids: Optional[torch.LongTensor] = None, pixel_values: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.LongTensor] = None, - labels: Tuple[torch.Tensor] = None, + labels: Optional[Tuple[torch.Tensor]] = None, head_mask: Optional[torch.FloatTensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/udop/modeling_udop.py b/src/transformers/models/udop/modeling_udop.py index b2c43815f5..5fb3c0ce8d 100644 --- a/src/transformers/models/udop/modeling_udop.py +++ b/src/transformers/models/udop/modeling_udop.py @@ -1716,9 +1716,9 @@ class UdopModel(UdopPreTrainedModel): self, input_ids: Optional[Tensor] = None, attention_mask: Optional[Tensor] = None, - bbox: Dict[str, Any] = None, + bbox: Optional[Dict[str, Any]] = None, pixel_values: Optional[Tensor] = None, - visual_bbox: Dict[str, Any] = None, + visual_bbox: Optional[Dict[str, Any]] = None, decoder_input_ids: Optional[Tensor] = None, decoder_attention_mask: Optional[Tensor] = None, inputs_embeds: Optional[Tensor] = None, @@ -1892,9 +1892,9 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin): self, input_ids: Optional[Tensor] = None, attention_mask: Optional[Tensor] = None, - bbox: Dict[str, Any] = None, + bbox: Optional[Dict[str, Any]] = None, pixel_values: Optional[Tensor] = None, - visual_bbox: Dict[str, Any] = None, + visual_bbox: Optional[Dict[str, Any]] = None, decoder_input_ids: Optional[Tensor] = None, decoder_attention_mask: Optional[Tensor] = None, inputs_embeds: Optional[Tensor] = None, @@ -2104,10 +2104,10 @@ class UdopEncoderModel(UdopPreTrainedModel): def forward( self, input_ids: Optional[Tensor] = None, - bbox: Dict[str, Any] = None, + bbox: Optional[Dict[str, Any]] = None, attention_mask: Optional[Tensor] = None, pixel_values: Optional[Tensor] = None, - visual_bbox: Dict[str, Any] = None, + visual_bbox: Optional[Dict[str, Any]] = None, head_mask: Optional[Tensor] = None, inputs_embeds: Optional[Tensor] = None, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/udop/tokenization_udop.py b/src/transformers/models/udop/tokenization_udop.py index 08eccaec7b..86ae0744d5 100644 --- a/src/transformers/models/udop/tokenization_udop.py +++ b/src/transformers/models/udop/tokenization_udop.py @@ -511,7 +511,7 @@ class UdopTokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair_target: Optional[ @@ -545,7 +545,7 @@ class UdopTokenizer(PreTrainedTokenizer): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/udop/tokenization_udop_fast.py b/src/transformers/models/udop/tokenization_udop_fast.py index 9992da7bdd..337617f721 100644 --- a/src/transformers/models/udop/tokenization_udop_fast.py +++ b/src/transformers/models/udop/tokenization_udop_fast.py @@ -243,7 +243,7 @@ class UdopTokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, text_target: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, text_pair_target: Optional[ @@ -278,7 +278,7 @@ class UdopTokenizerFast(PreTrainedTokenizerFast): self, text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]], text_pair: Optional[Union[PreTokenizedInput, List[PreTokenizedInput]]] = None, - boxes: Union[List[List[int]], List[List[List[int]]]] = None, + boxes: Optional[Union[List[List[int]], List[List[List[int]]]]] = None, word_labels: Optional[Union[List[int], List[List[int]]]] = None, add_special_tokens: bool = True, padding: Union[bool, str, PaddingStrategy] = False, diff --git a/src/transformers/models/video_llava/image_processing_video_llava.py b/src/transformers/models/video_llava/image_processing_video_llava.py index 8b7f489791..6597241683 100644 --- a/src/transformers/models/video_llava/image_processing_video_llava.py +++ b/src/transformers/models/video_llava/image_processing_video_llava.py @@ -90,10 +90,10 @@ class VideoLlavaImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BICUBIC, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -172,10 +172,10 @@ class VideoLlavaImageProcessor(BaseImageProcessor): @filter_out_non_signature_kwargs() def preprocess( self, - images: List[ImageInput] = None, - videos: List[VideoInput] = None, + images: Optional[List[ImageInput]] = None, + videos: Optional[List[VideoInput]] = None, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, crop_size: Optional[int] = None, diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py index a7043d5ada..fa0459fabb 100644 --- a/src/transformers/models/videomae/image_processing_videomae.py +++ b/src/transformers/models/videomae/image_processing_videomae.py @@ -105,10 +105,10 @@ class VideoMAEImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -181,10 +181,10 @@ class VideoMAEImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, @@ -239,10 +239,10 @@ class VideoMAEImageProcessor(BaseImageProcessor): self, videos: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/vilt/image_processing_vilt.py b/src/transformers/models/vilt/image_processing_vilt.py index d4ac8cca32..4bd7ac55ee 100644 --- a/src/transformers/models/vilt/image_processing_vilt.py +++ b/src/transformers/models/vilt/image_processing_vilt.py @@ -165,7 +165,7 @@ class ViltImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, size_divisor: int = 32, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, diff --git a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py index 659afb976c..4d96a68bc1 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py @@ -400,7 +400,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -473,12 +473,12 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): encoder_outputs, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -607,7 +607,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py index 57afc18988..020efb3c5c 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py @@ -273,7 +273,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel): attention_mask=None, position_ids=None, token_type_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -322,7 +322,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel): attention_mask=None, position_ids=None, token_type_ids=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train=False, ): @@ -379,7 +379,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel): ) def get_image_features( - self, pixel_values, params: dict = None, dropout_rng: jax.random.PRNGKey = None, train=False + self, pixel_values, params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train=False ): r""" Args: diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index ade7495b1d..654e56ab91 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -155,7 +155,7 @@ class ViTImageProcessor(BaseImageProcessor): self, images: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, diff --git a/src/transformers/models/vit/modeling_flax_vit.py b/src/transformers/models/vit/modeling_flax_vit.py index 5cf3477b5d..8e86248cde 100644 --- a/src/transformers/models/vit/modeling_flax_vit.py +++ b/src/transformers/models/vit/modeling_flax_vit.py @@ -474,7 +474,7 @@ class FlaxViTPreTrainedModel(FlaxPreTrainedModel): def __call__( self, pixel_values, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, diff --git a/src/transformers/models/vitpose/convert_vitpose_to_hf.py b/src/transformers/models/vitpose/convert_vitpose_to_hf.py index e4666751a1..e9bbad2035 100644 --- a/src/transformers/models/vitpose/convert_vitpose_to_hf.py +++ b/src/transformers/models/vitpose/convert_vitpose_to_hf.py @@ -22,6 +22,7 @@ Notebook to get the original logits: https://colab.research.google.com/drive/1QD import argparse import os import re +from typing import Optional import requests import torch @@ -160,7 +161,7 @@ def get_config(model_name): return config -def convert_old_keys_to_new_keys(state_dict_keys: dict = None): +def convert_old_keys_to_new_keys(state_dict_keys: Optional[dict] = None): """ This function should be applied only once, on the concatenated keys to efficiently rename using the key mappings. diff --git a/src/transformers/models/vitpose/image_processing_vitpose.py b/src/transformers/models/vitpose/image_processing_vitpose.py index 8320ad2d6d..fc1a871939 100644 --- a/src/transformers/models/vitpose/image_processing_vitpose.py +++ b/src/transformers/models/vitpose/image_processing_vitpose.py @@ -353,7 +353,7 @@ class VitPoseImageProcessor(BaseImageProcessor): def __init__( self, do_affine_transform: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -425,7 +425,7 @@ class VitPoseImageProcessor(BaseImageProcessor): images: ImageInput, boxes: Union[List[List[float]], np.ndarray], do_affine_transform: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, do_normalize: Optional[bool] = None, diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index abf0a5808e..7e984f0251 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -109,10 +109,10 @@ class VivitImageProcessor(BaseImageProcessor): def __init__( self, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_center_crop: bool = True, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 127.5, offset: bool = True, @@ -228,10 +228,10 @@ class VivitImageProcessor(BaseImageProcessor): self, image: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, offset: Optional[bool] = None, @@ -291,10 +291,10 @@ class VivitImageProcessor(BaseImageProcessor): self, videos: ImageInput, do_resize: Optional[bool] = None, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = None, do_center_crop: Optional[bool] = None, - crop_size: Dict[str, int] = None, + crop_size: Optional[Dict[str, int]] = None, do_rescale: Optional[bool] = None, rescale_factor: Optional[float] = None, offset: Optional[bool] = None, diff --git a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py index 5470762050..ee188888d7 100644 --- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py @@ -891,7 +891,7 @@ class FlaxWav2Vec2PreTrainedModel(FlaxPreTrainedModel): input_values, attention_mask=None, mask_time_indices=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, @@ -1327,7 +1327,7 @@ class FlaxWav2Vec2ForPreTraining(FlaxWav2Vec2PreTrainedModel): attention_mask=None, mask_time_indices=None, gumbel_temperature: int = 1, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, gumbel_rng: jax.random.PRNGKey = None, train: bool = False, diff --git a/src/transformers/models/whisper/modeling_flax_whisper.py b/src/transformers/models/whisper/modeling_flax_whisper.py index e3c7e9d1c6..decc393dfc 100644 --- a/src/transformers/models/whisper/modeling_flax_whisper.py +++ b/src/transformers/models/whisper/modeling_flax_whisper.py @@ -867,7 +867,7 @@ class FlaxWhisperPreTrainedModel(FlaxPreTrainedModel): def __init__( self, config: WhisperConfig, - input_shape: Tuple[int] = None, + input_shape: Optional[Tuple[int]] = None, seed: int = 0, dtype: jnp.dtype = jnp.float32, _do_init: bool = True, @@ -970,7 +970,7 @@ class FlaxWhisperPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, **kwargs, ): @@ -1025,12 +1025,12 @@ class FlaxWhisperPreTrainedModel(FlaxPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1144,7 +1144,7 @@ class FlaxWhisperPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -1278,12 +1278,12 @@ class FlaxWhisperForConditionalGeneration(FlaxWhisperPreTrainedModel): encoder_attention_mask: Optional[jnp.ndarray] = None, decoder_attention_mask: Optional[jnp.ndarray] = None, decoder_position_ids: Optional[jnp.ndarray] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, ): r""" @@ -1631,7 +1631,7 @@ class FlaxWhisperForAudioClassification(FlaxWhisperPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, + params: Optional[dict] = None, dropout_rng: PRNGKey = None, **kwargs, ): diff --git a/src/transformers/models/xglm/modeling_flax_xglm.py b/src/transformers/models/xglm/modeling_flax_xglm.py index 3b7a933e4d..96f797ea58 100644 --- a/src/transformers/models/xglm/modeling_flax_xglm.py +++ b/src/transformers/models/xglm/modeling_flax_xglm.py @@ -619,8 +619,8 @@ class FlaxXGLMPreTrainedModel(FlaxPreTrainedModel): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, train: bool = False, - params: dict = None, - past_key_values: dict = None, + params: Optional[dict] = None, + past_key_values: Optional[dict] = None, dropout_rng: PRNGKey = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions diff --git a/src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py index 63432be06d..b7fdeda1b2 100644 --- a/src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py @@ -831,13 +831,13 @@ class FlaxXLMRobertaPreTrainedModel(FlaxPreTrainedModel): head_mask=None, encoder_hidden_states=None, encoder_attention_mask=None, - params: dict = None, + params: Optional[dict] = None, dropout_rng: jax.random.PRNGKey = None, train: bool = False, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - past_key_values: dict = None, + past_key_values: Optional[dict] = None, ): output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py index 1fe5823c20..dcc9bf2344 100644 --- a/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py @@ -1043,7 +1043,7 @@ class XLMRobertaForCausalLM(XLMRobertaPreTrainedModel, GenerationMixin): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/xmod/modeling_xmod.py b/src/transformers/models/xmod/modeling_xmod.py index 21aad7188e..9465bf7a65 100644 --- a/src/transformers/models/xmod/modeling_xmod.py +++ b/src/transformers/models/xmod/modeling_xmod.py @@ -996,7 +996,7 @@ class XmodForCausalLM(XmodPreTrainedModel, GenerationMixin): encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, - past_key_values: Tuple[Tuple[torch.FloatTensor]] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index 2c1f0d1d2b..681ed17f2f 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -668,7 +668,7 @@ def compute_segments( mask_threshold: float = 0.5, overlap_mask_area_threshold: float = 0.8, label_ids_to_fuse: Optional[Set[int]] = None, - target_size: Tuple[int, int] = None, + target_size: Optional[Tuple[int, int]] = None, ): height = mask_probs.shape[1] if target_size is None else target_size[0] width = mask_probs.shape[2] if target_size is None else target_size[1] @@ -777,13 +777,13 @@ class YolosImageProcessor(BaseImageProcessor): self, format: Union[str, AnnotationFormat] = AnnotationFormat.COCO_DETECTION, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, - image_mean: Union[float, List[float]] = None, - image_std: Union[float, List[float]] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, do_convert_annotations: Optional[bool] = None, do_pad: bool = True, pad_size: Optional[Dict[str, int]] = None, diff --git a/src/transformers/models/zoedepth/image_processing_zoedepth.py b/src/transformers/models/zoedepth/image_processing_zoedepth.py index f839548320..47920c29c6 100644 --- a/src/transformers/models/zoedepth/image_processing_zoedepth.py +++ b/src/transformers/models/zoedepth/image_processing_zoedepth.py @@ -154,7 +154,7 @@ class ZoeDepthImageProcessor(BaseImageProcessor): image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, do_resize: bool = True, - size: Dict[str, int] = None, + size: Optional[Dict[str, int]] = None, resample: PILImageResampling = PILImageResampling.BILINEAR, keep_aspect_ratio: bool = True, ensure_multiple_of: int = 32, diff --git a/src/transformers/onnx/config.py b/src/transformers/onnx/config.py index 460ee93299..bba2592f3c 100644 --- a/src/transformers/onnx/config.py +++ b/src/transformers/onnx/config.py @@ -108,7 +108,9 @@ class OnnxConfig(ABC): "speech2seq-lm": OrderedDict({"logits": {0: "batch", 1: "sequence"}}), } - def __init__(self, config: "PretrainedConfig", task: str = "default", patching_specs: List[PatchingSpec] = None): + def __init__( + self, config: "PretrainedConfig", task: str = "default", patching_specs: Optional[List[PatchingSpec]] = None + ): self._config = config if task not in self._tasks_to_common_outputs: diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py index 899a7cc539..96b0565d78 100644 --- a/src/transformers/pipelines/document_question_answering.py +++ b/src/transformers/pipelines/document_question_answering.py @@ -202,7 +202,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): self, image: Union["Image.Image", str], question: Optional[str] = None, - word_boxes: Tuple[str, List[float]] = None, + word_boxes: Optional[Tuple[str, List[float]]] = None, **kwargs, ): """ @@ -283,7 +283,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline): padding="do_not_pad", doc_stride=None, max_seq_len=None, - word_boxes: Tuple[str, List[float]] = None, + word_boxes: Optional[Tuple[str, List[float]]] = None, lang=None, tesseract_config="", timeout=None, diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index 616eb8def7..940dea10d7 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -13,7 +13,7 @@ # limitations under the License. import warnings from io import BytesIO -from typing import List, Union +from typing import List, Optional, Union import requests @@ -77,7 +77,7 @@ class VideoClassificationPipeline(Pipeline): postprocess_params["function_to_apply"] = "softmax" return preprocess_params, {}, postprocess_params - def __call__(self, inputs: Union[str, List[str]] = None, **kwargs): + def __call__(self, inputs: Optional[Union[str, List[str]]] = None, **kwargs): """ Assign labels to the video(s) passed as inputs. diff --git a/src/transformers/pipelines/visual_question_answering.py b/src/transformers/pipelines/visual_question_answering.py index 6d600c9eaf..83dbd8f215 100644 --- a/src/transformers/pipelines/visual_question_answering.py +++ b/src/transformers/pipelines/visual_question_answering.py @@ -1,4 +1,4 @@ -from typing import List, Union +from typing import List, Optional, Union from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging from .base import Pipeline, build_pipeline_init_args @@ -79,7 +79,7 @@ class VisualQuestionAnsweringPipeline(Pipeline): def __call__( self, image: Union["Image.Image", str, List["Image.Image"], List[str], "KeyDataset"], - question: Union[str, List[str]] = None, + question: Optional[Union[str, List[str]]] = None, **kwargs, ): r""" diff --git a/src/transformers/pipelines/zero_shot_object_detection.py b/src/transformers/pipelines/zero_shot_object_detection.py index ce8da7340b..5385d84db7 100644 --- a/src/transformers/pipelines/zero_shot_object_detection.py +++ b/src/transformers/pipelines/zero_shot_object_detection.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Union from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging, requires_backends from .base import ChunkPipeline, build_pipeline_init_args @@ -65,7 +65,7 @@ class ZeroShotObjectDetectionPipeline(ChunkPipeline): def __call__( self, image: Union[str, "Image.Image", List[Dict[str, Any]]], - candidate_labels: Union[str, List[str]] = None, + candidate_labels: Optional[Union[str, List[str]]] = None, **kwargs, ): """ diff --git a/src/transformers/trainer_utils.py b/src/transformers/trainer_utils.py index 4d3dd6d6bb..49feecf694 100644 --- a/src/transformers/trainer_utils.py +++ b/src/transformers/trainer_utils.py @@ -792,7 +792,7 @@ def number_of_arguments(func): def find_executable_batch_size( - function: callable = None, starting_batch_size: int = 128, auto_find_batch_size: bool = False + function: Optional[callable] = None, starting_batch_size: int = 128, auto_find_batch_size: bool = False ): """ Args: diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index f420a3e12d..9654d5d1ff 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -1877,7 +1877,7 @@ class _LazyModule(ModuleType): module_file: str, import_structure: IMPORT_STRUCTURE_T, module_spec: Optional[importlib.machinery.ModuleSpec] = None, - extra_objects: Dict[str, object] = None, + extra_objects: Optional[Dict[str, object]] = None, ): super().__init__(name) @@ -2412,7 +2412,7 @@ def spread_import_structure(nested_import_structure): @lru_cache() -def define_import_structure(module_path: str, prefix: str = None) -> IMPORT_STRUCTURE_T: +def define_import_structure(module_path: str, prefix: Optional[str] = None) -> IMPORT_STRUCTURE_T: """ This method takes a module_path as input and creates an import structure digestible by a _LazyModule. diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py index 816ff9ae55..72b3837142 100644 --- a/src/transformers/utils/quantization_config.py +++ b/src/transformers/utils/quantization_config.py @@ -1308,13 +1308,13 @@ class CompressedTensorsConfig(QuantizationConfigMixin): def __init__( self, - config_groups: Dict[str, Union["QuantizationScheme", List[str]]] = None, # noqa: F821 + config_groups: Optional[Dict[str, Union["QuantizationScheme", List[str]]]] = None, # noqa: F821 format: str = "dense", quantization_status: "QuantizationStatus" = "initialized", # noqa: F821 kv_cache_scheme: Optional["QuantizationArgs"] = None, # noqa: F821 global_compression_ratio: Optional[float] = None, ignore: Optional[List[str]] = None, - sparsity_config: Dict[str, Any] = None, + sparsity_config: Optional[Dict[str, Any]] = None, quant_method: str = "compressed-tensors", run_compressed: bool = True, **kwargs, diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py index b5215c1e9f..b2b96613e6 100644 --- a/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -19,6 +19,7 @@ import os import unittest from copy import deepcopy from functools import partial +from typing import Optional import datasets from parameterized import parameterized @@ -1252,8 +1253,8 @@ class TestDeepSpeedWithLauncher(TestCasePlus): do_eval: bool = True, quality_checks: bool = True, fp32: bool = False, - extra_args_str: str = None, - remove_args_str: str = None, + extra_args_str: Optional[str] = None, + remove_args_str: Optional[str] = None, ): # we are doing quality testing so using a small real model output_dir = self.run_trainer( @@ -1285,8 +1286,8 @@ class TestDeepSpeedWithLauncher(TestCasePlus): do_eval: bool = True, distributed: bool = True, fp32: bool = False, - extra_args_str: str = None, - remove_args_str: str = None, + extra_args_str: Optional[str] = None, + remove_args_str: Optional[str] = None, ): max_len = 32 data_dir = self.test_file_dir / "../fixtures/tests_samples/wmt_en_ro" diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index afa8be4b8f..dc087559d4 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -17,6 +17,7 @@ import os import re import sys from pathlib import Path +from typing import Optional from unittest.mock import patch from parameterized import parameterized @@ -270,13 +271,13 @@ class TestTrainerExt(TestCasePlus): learning_rate: float = 3e-3, optim: str = "adafactor", distributed: bool = False, - extra_args_str: str = None, + extra_args_str: Optional[str] = None, eval_steps: int = 0, predict_with_generate: bool = True, do_train: bool = True, do_eval: bool = True, do_predict: bool = True, - n_gpus_to_use: int = None, + n_gpus_to_use: Optional[int] = None, ): data_dir = self.test_file_dir / "../fixtures/tests_samples/wmt_en_ro" output_dir = self.get_auto_remove_tmp_dir() diff --git a/tests/models/bridgetower/test_image_processing_bridgetower.py b/tests/models/bridgetower/test_image_processing_bridgetower.py index 388bb65f69..12d2d03b77 100644 --- a/tests/models/bridgetower/test_image_processing_bridgetower.py +++ b/tests/models/bridgetower/test_image_processing_bridgetower.py @@ -41,7 +41,7 @@ class BridgeTowerImageProcessingTester: self, parent, do_resize: bool = True, - size: dict[str, int] = None, + size: Optional[dict[str, int]] = None, size_divisor: int = 32, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, diff --git a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py index 5c5ff13153..ad00eab111 100644 --- a/tests/models/patchtsmixer/test_modeling_patchtsmixer.py +++ b/tests/models/patchtsmixer/test_modeling_patchtsmixer.py @@ -92,12 +92,12 @@ class PatchTSMixerModelTester: head_dropout: float = 0.2, # forecast related prediction_length: int = 16, - out_channels: int = None, + out_channels: Optional[int] = None, # Classification/regression related # num_labels: int = 3, num_targets: int = 3, - output_range: list = None, - head_aggregation: str = None, + output_range: Optional[list] = None, + head_aggregation: Optional[str] = None, # Trainer related batch_size=13, is_training=True, diff --git a/tests/models/tvp/test_image_processing_tvp.py b/tests/models/tvp/test_image_processing_tvp.py index 6183c2e163..20f4ed7369 100644 --- a/tests/models/tvp/test_image_processing_tvp.py +++ b/tests/models/tvp/test_image_processing_tvp.py @@ -41,12 +41,12 @@ class TvpImageProcessingTester: do_resize: bool = True, size: dict[str, int] = {"longest_edge": 40}, do_center_crop: bool = False, - crop_size: dict[str, int] = None, + crop_size: Optional[dict[str, int]] = None, do_rescale: bool = False, rescale_factor: Union[int, float] = 1 / 255, do_pad: bool = True, pad_size: dict[str, int] = {"height": 80, "width": 80}, - fill: int = None, + fill: Optional[int] = None, pad_mode: PaddingMode = None, do_normalize: bool = True, image_mean: Optional[Union[float, list[float]]] = [0.48145466, 0.4578275, 0.40821073], diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index 0e2ab52203..356473d11a 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -28,7 +28,7 @@ from collections import OrderedDict from functools import lru_cache from itertools import takewhile from pathlib import Path -from typing import TYPE_CHECKING, Any, Union +from typing import TYPE_CHECKING, Any, Optional, Union from parameterized import parameterized @@ -173,7 +173,7 @@ def _test_subword_regularization_tokenizer(in_queue, out_queue, timeout): def check_subword_sampling( tokenizer: PreTrainedTokenizer, - text: str = None, + text: Optional[str] = None, test_sentencepiece_ignore_case: bool = True, ) -> None: """ @@ -321,9 +321,9 @@ class TokenizerTesterMixin: self, expected_encoding: dict, model_name: str, - revision: str = None, - sequences: list[str] = None, - decode_kwargs: dict[str, Any] = None, + revision: Optional[str] = None, + sequences: Optional[list[str]] = None, + decode_kwargs: Optional[dict[str, Any]] = None, padding: bool = True, ): """ diff --git a/utils/check_copies.py b/utils/check_copies.py index 0dffa79a32..9b392f1367 100644 --- a/utils/check_copies.py +++ b/utils/check_copies.py @@ -390,7 +390,7 @@ def split_code_into_blocks( def find_code_in_transformers( - object_name: str, base_path: str = None, return_indices: bool = False + object_name: str, base_path: Optional[str] = None, return_indices: bool = False ) -> Union[str, Tuple[List[str], int, int]]: """ Find and return the source code of an object. @@ -491,7 +491,7 @@ def replace_code(code: str, replace_pattern: str) -> str: return code -def find_code_and_splits(object_name: str, base_path: str, buffer: dict = None): +def find_code_and_splits(object_name: str, base_path: str, buffer: Optional[dict] = None): """Find the code of an object (specified by `object_name`) and split it into blocks. Args: @@ -638,7 +638,9 @@ def check_codes_match(observed_code: str, theoretical_code: str) -> Optional[int diff_index += 1 -def is_copy_consistent(filename: str, overwrite: bool = False, buffer: dict = None) -> Optional[List[Tuple[str, int]]]: +def is_copy_consistent( + filename: str, overwrite: bool = False, buffer: Optional[dict] = None +) -> Optional[List[Tuple[str, int]]]: """ Check if the code commented as a copy in a file matches the original. @@ -831,7 +833,7 @@ def is_copy_consistent(filename: str, overwrite: bool = False, buffer: dict = No return diffs -def check_copies(overwrite: bool = False, file: str = None): +def check_copies(overwrite: bool = False, file: Optional[str] = None): """ Check every file is copy-consistent with the original. Also check the model list in the main README and other READMEs are consistent. diff --git a/utils/notification_service.py b/utils/notification_service.py index f7f3d16e55..17e3b9d498 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -107,7 +107,7 @@ class Message: ci_title: str, model_results: Dict, additional_results: Dict, - selected_warnings: List = None, + selected_warnings: Optional[List] = None, prev_ci_artifacts=None, ): self.title = title @@ -856,7 +856,7 @@ def retrieve_available_artifacts(): def __str__(self): return self.name - def add_path(self, path: str, gpu: str = None): + def add_path(self, path: str, gpu: Optional[str] = None): self.paths.append({"name": self.name, "path": path, "gpu": gpu}) _available_artifacts: Dict[str, Artifact] = {} diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index e2a256dfd6..afdd35b9a8 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -59,7 +59,7 @@ import re import tempfile from contextlib import contextmanager from pathlib import Path -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union from git import Repo @@ -621,7 +621,7 @@ _re_single_line_direct_imports = re.compile(r"(?:^|\n)\s*from\s+transformers(\S* _re_multi_line_direct_imports = re.compile(r"(?:^|\n)\s*from\s+transformers(\S*)\s+import\s+\(([^\)]+)\)") -def extract_imports(module_fname: str, cache: Dict[str, List[str]] = None) -> List[str]: +def extract_imports(module_fname: str, cache: Optional[Dict[str, List[str]]] = None) -> List[str]: """ Get the imports a given module makes. @@ -703,7 +703,7 @@ def extract_imports(module_fname: str, cache: Dict[str, List[str]] = None) -> Li return result -def get_module_dependencies(module_fname: str, cache: Dict[str, List[str]] = None) -> List[str]: +def get_module_dependencies(module_fname: str, cache: Optional[Dict[str, List[str]]] = None) -> List[str]: """ Refines the result of `extract_imports` to remove subfolders and get a proper list of module filenames: if a file as an import `from utils import Foo, Bar`, with `utils` being a subfolder containing many files, this will traverse @@ -953,7 +953,7 @@ def create_reverse_dependency_map() -> Dict[str, List[str]]: def create_module_to_test_map( - reverse_map: Dict[str, List[str]] = None, filter_models: bool = False + reverse_map: Optional[Dict[str, List[str]]] = None, filter_models: bool = False ) -> Dict[str, List[str]]: """ Extract the tests from the reverse_dependency_map and potentially filters the model tests.