[deepspeed] offload + non-cpuadam optimizer exception doc (#22044)
* [deepspeed] offload + non-cpuadam optimizer exception doc * deps
This commit is contained in:
@@ -1293,8 +1293,17 @@ If you want to use another optimizer which is not listed above, you will have to
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Similarly to `AdamW`, you can configure other officially supported optimizers. Just remember that may have different
|
Similarly to `AdamW`, you can configure other officially supported optimizers. Just remember that those may have different config values. e.g. for Adam you will want `weight_decay` around `0.01`.
|
||||||
config values. e.g. for Adam you will want `weight_decay` around `0.01`.
|
|
||||||
|
Additionally, offload works the best when it's used with Deepspeed's CPU Adam optimizer. If you want to use a different optimizer with offload, since `deepspeed==0.8.3` you need to also add:
|
||||||
|
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"zero_force_ds_cpu_optimizer": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
to the top level configuration.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
2
setup.py
2
setup.py
@@ -106,7 +106,7 @@ _deps = [
|
|||||||
"dataclasses",
|
"dataclasses",
|
||||||
"datasets!=2.5.0",
|
"datasets!=2.5.0",
|
||||||
"decord==0.6.0",
|
"decord==0.6.0",
|
||||||
"deepspeed>=0.6.5",
|
"deepspeed>=0.8.3",
|
||||||
"dill<0.3.5",
|
"dill<0.3.5",
|
||||||
"evaluate>=0.2.0",
|
"evaluate>=0.2.0",
|
||||||
"fairscale>0.3",
|
"fairscale>0.3",
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ deps = {
|
|||||||
"dataclasses": "dataclasses",
|
"dataclasses": "dataclasses",
|
||||||
"datasets": "datasets!=2.5.0",
|
"datasets": "datasets!=2.5.0",
|
||||||
"decord": "decord==0.6.0",
|
"decord": "decord==0.6.0",
|
||||||
"deepspeed": "deepspeed>=0.6.5",
|
"deepspeed": "deepspeed>=0.8.3",
|
||||||
"dill": "dill<0.3.5",
|
"dill": "dill<0.3.5",
|
||||||
"evaluate": "evaluate>=0.2.0",
|
"evaluate": "evaluate>=0.2.0",
|
||||||
"fairscale": "fairscale>0.3",
|
"fairscale": "fairscale>0.3",
|
||||||
|
|||||||
Reference in New Issue
Block a user