From e75cb0cb3c5fef887abea6f099252e59a659af9d Mon Sep 17 00:00:00 2001 From: Shauray Singh <39147312+shauray8@users.noreply.github.com> Date: Thu, 20 Jul 2023 19:43:13 +0530 Subject: [PATCH] fix type annotations for arguments in training_args (#24550) * testing * example script * fix typehinting * some tests * make test * optional update * Union of arguments * does this fix the issue * remove reports * set default to False * documentation change * None support * does not need None * Fix typing annotations for FSDP and DeepSpeed in TrainingArguments (#24549) * Fix typing annotations for FSDP and DeepSpeed in TrainingArguments * Change dict to Dict * Revert "Fix typing annotations for FSDP and DeepSpeed in TrainingArguments" (#24574) Revert "Fix typing annotations for FSDP and DeepSpeed in TrainingArguments (#24549)" This reverts commit c5e29d4381d4b9739e6cb427adbca87fbb43a3ad. * Fix typing annotations for FSDP and DeepSpeed in TrainingArguments (#24549) * Fix typing annotations for FSDP and DeepSpeed in TrainingArguments * Change dict to Dict * merge * hacky fix * fixup --------- Co-authored-by: Max Ryabinin Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/training_args.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 42a9ee2fbc..7be24cc315 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -406,7 +406,7 @@ class TrainingArguments: When resuming training, whether or not to skip the epochs and batches to get the data loading at the same stage as in the previous training. If set to `True`, the training will begin faster (as that skipping step can take a long time) but will not yield the same results as the interrupted training would have. - sharded_ddp (`bool`, `str` or list of [`~trainer_utils.ShardedDDPOption`], *optional*, defaults to `False`): + sharded_ddp (`bool`, `str` or list of [`~trainer_utils.ShardedDDPOption`], *optional*, defaults to `''`): Use Sharded DDP training from [FairScale](https://github.com/facebookresearch/fairscale) (in distributed training only). This is an experimental feature. @@ -421,7 +421,7 @@ class TrainingArguments: If a string is passed, it will be split on space. If a bool is passed, it will be converted to an empty list for `False` and `["simple"]` for `True`. - fsdp (`bool`, `str` or list of [`~trainer_utils.FSDPOption`], *optional*, defaults to `False`): + fsdp (`bool`, `str` or list of [`~trainer_utils.FSDPOption`], *optional*, defaults to `''`): Use PyTorch Distributed Parallel Training (in distributed training only). A list of options along the following: @@ -969,7 +969,7 @@ class TrainingArguments: ) }, ) - sharded_ddp: str = field( + sharded_ddp: Optional[Union[List[ShardedDDPOption], str]] = field( default="", metadata={ "help": ( @@ -980,7 +980,7 @@ class TrainingArguments: ), }, ) - fsdp: str = field( + fsdp: Optional[Union[List[FSDPOption], str]] = field( default="", metadata={ "help": ( @@ -1005,8 +1005,8 @@ class TrainingArguments: default=None, metadata={ "help": ( - "Config to be used with FSDP (Pytorch Fully Sharded Data Parallel). The value is either a" - "fsdp json config file (e.g., `fsdp_config.json`) or an already loaded json file as `dict`." + "Config to be used with FSDP (Pytorch Fully Sharded Data Parallel). The value is either a" + "fsdp json config file (e.g., `fsdp_config.json`) or an already loaded json file as `dict`." ) }, ) @@ -1019,11 +1019,11 @@ class TrainingArguments: ) }, ) - deepspeed: Optional[str] = field( + deepspeed: Optional[Union[str, Dict]] = field( default=None, metadata={ "help": ( - "Enable deepspeed and pass the path to deepspeed json config file (e.g. ds_config.json) or an already" + "Enable deepspeed and pass the path to deepspeed json config file (e.g. `ds_config.json`) or an already" " loaded json file as a dict" ) },