From 4ccaf268fbfb3f5ca67b4e1c953fa10c05168432 Mon Sep 17 00:00:00 2001 From: "Wang, Yi" Date: Wed, 22 Mar 2023 18:42:39 +0800 Subject: [PATCH] =?UTF-8?q?add=20low=5Fcpu=5Fmem=5Fusage=20option=20in=20r?= =?UTF-8?q?un=5Fclm.py=20example=20which=20will=20benefit=E2=80=A6=20(#222?= =?UTF-8?q?88)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add low_cpu_mem_usage option in run_clm.py example which will benefit LLM loading Signed-off-by: Wang, Yi A * update all the example and README under language-modeling Signed-off-by: Wang, Yi A --------- Signed-off-by: Wang, Yi A --- examples/pytorch/language-modeling/README.md | 4 ++++ examples/pytorch/language-modeling/run_clm.py | 10 ++++++++++ .../pytorch/language-modeling/run_clm_no_trainer.py | 9 +++++++++ examples/pytorch/language-modeling/run_mlm.py | 10 ++++++++++ .../pytorch/language-modeling/run_mlm_no_trainer.py | 9 +++++++++ examples/pytorch/language-modeling/run_plm.py | 10 ++++++++++ 6 files changed, 52 insertions(+) diff --git a/examples/pytorch/language-modeling/README.md b/examples/pytorch/language-modeling/README.md index ff504b5357..3069fe9eb9 100644 --- a/examples/pytorch/language-modeling/README.md +++ b/examples/pytorch/language-modeling/README.md @@ -178,6 +178,10 @@ sure all your batches have the same length. To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` to the command line. This is currently supported by `run_mlm.py` and `run_clm.py`. +## Low Cpu Memory Usage + +To use low cpu memory mode which can be very useful for LLM, add `--low_cpu_mem_usage` to the command line. This is currently supported by `run_clm.py`,`run_mlm.py`, `run_plm.py`,`run_mlm_no_trainer.py` and `run_clm_no_trainer.py`. + ## Creating a model on the fly When training a model from scratch, configuration values may be overridden with the help of `--config_overrides`: diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 9b83c8e2ee..619697f9b4 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -130,6 +130,15 @@ class ModelArguments: "choices": ["auto", "bfloat16", "float16", "float32"], }, ) + low_cpu_mem_usage: bool = field( + default=False, + metadata={ + "help": ( + "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded." + "set True will benefit LLM loading time and RAM consumption." + ) + }, + ) def __post_init__(self): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): @@ -408,6 +417,7 @@ def main(): revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, torch_dtype=torch_dtype, + low_cpu_mem_usage=model_args.low_cpu_mem_usage, ) else: model = AutoModelForCausalLM.from_config(config) diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index e43ff38555..a62a0267bf 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -220,6 +220,14 @@ def parse_args(): "Only applicable when `--with_tracking` is passed." ), ) + parser.add_argument( + "--low_cpu_mem_usage", + action="store_true", + help=( + "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded." + "If passed, LLM loading time and RAM consumption will be benefited." + ), + ) args = parser.parse_args() # Sanity checks @@ -374,6 +382,7 @@ def main(): args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, + low_cpu_mem_usage=args.low_cpu_mem_usage, ) else: logger.info("Training new model from scratch") diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index a8d3d8f29a..b8336fedce 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -116,6 +116,15 @@ class ModelArguments: ) }, ) + low_cpu_mem_usage: bool = field( + default=False, + metadata={ + "help": ( + "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded." + "set True will benefit LLM loading time and RAM consumption." + ) + }, + ) def __post_init__(self): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): @@ -395,6 +404,7 @@ def main(): cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, + low_cpu_mem_usage=model_args.low_cpu_mem_usage, ) else: logger.info("Training new model from scratch") diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 78cf313499..c67535edb2 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -227,6 +227,14 @@ def parse_args(): "Only applicable when `--with_tracking` is passed." ), ) + parser.add_argument( + "--low_cpu_mem_usage", + action="store_true", + help=( + "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded." + "If passed, LLM loading time and RAM consumption will be benefited." + ), + ) args = parser.parse_args() # Sanity checks @@ -379,6 +387,7 @@ def main(): args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, + low_cpu_mem_usage=args.low_cpu_mem_usage, ) else: logger.info("Training new model from scratch") diff --git a/examples/pytorch/language-modeling/run_plm.py b/examples/pytorch/language-modeling/run_plm.py index 582fe5215b..06ab1f29f8 100755 --- a/examples/pytorch/language-modeling/run_plm.py +++ b/examples/pytorch/language-modeling/run_plm.py @@ -104,6 +104,15 @@ class ModelArguments: ) }, ) + low_cpu_mem_usage: bool = field( + default=False, + metadata={ + "help": ( + "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded." + "set True will benefit LLM loading time and RAM consumption." + ) + }, + ) def __post_init__(self): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): @@ -375,6 +384,7 @@ def main(): cache_dir=model_args.cache_dir, revision=model_args.model_revision, use_auth_token=True if model_args.use_auth_token else None, + low_cpu_mem_usage=model_args.low_cpu_mem_usage, ) else: logger.info("Training new model from scratch")