add low_cpu_mem_usage option in run_clm.py example which will benefit… (#22288)

* add low_cpu_mem_usage option in run_clm.py example which will benefit LLM loading Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> * update all the example and README under language-modeling Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> --------- Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2023-03-22 18:42:39 +08:00
parent 8472a224fb
commit 4ccaf268fb
6 changed files with 52 additions and 0 deletions
--- a/examples/pytorch/language-modeling/run_clm.py
+++ b/examples/pytorch/language-modeling/run_clm.py
@@ -130,6 +130,15 @@ class ModelArguments:
            "choices": ["auto", "bfloat16", "float16", "float32"],
        },
    )
+    low_cpu_mem_usage: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
+                "set True will benefit LLM loading time and RAM consumption."
+            )
+        },
+    )

    def __post_init__(self):
        if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
@@ -408,6 +417,7 @@ def main():
            revision=model_args.model_revision,
            use_auth_token=True if model_args.use_auth_token else None,
            torch_dtype=torch_dtype,
+            low_cpu_mem_usage=model_args.low_cpu_mem_usage,
        )
    else:
        model = AutoModelForCausalLM.from_config(config)