add low_cpu_mem_usage option in run_clm.py example which will benefit… (#22288)

* add low_cpu_mem_usage option in run_clm.py example which will benefit LLM loading

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>

* update all the example and README under language-modeling

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>

---------

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi
2023-03-22 18:42:39 +08:00
committed by GitHub
parent 8472a224fb
commit 4ccaf268fb
6 changed files with 52 additions and 0 deletions

View File

@@ -178,6 +178,10 @@ sure all your batches have the same length.
To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` to the command line. This is currently supported by `run_mlm.py` and `run_clm.py`.
## Low Cpu Memory Usage
To use low cpu memory mode which can be very useful for LLM, add `--low_cpu_mem_usage` to the command line. This is currently supported by `run_clm.py`,`run_mlm.py`, `run_plm.py`,`run_mlm_no_trainer.py` and `run_clm_no_trainer.py`.
## Creating a model on the fly
When training a model from scratch, configuration values may be overridden with the help of `--config_overrides`:

View File

@@ -130,6 +130,15 @@ class ModelArguments:
"choices": ["auto", "bfloat16", "float16", "float32"],
},
)
low_cpu_mem_usage: bool = field(
default=False,
metadata={
"help": (
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
"set True will benefit LLM loading time and RAM consumption."
)
},
)
def __post_init__(self):
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
@@ -408,6 +417,7 @@ def main():
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
torch_dtype=torch_dtype,
low_cpu_mem_usage=model_args.low_cpu_mem_usage,
)
else:
model = AutoModelForCausalLM.from_config(config)

View File

@@ -220,6 +220,14 @@ def parse_args():
"Only applicable when `--with_tracking` is passed."
),
)
parser.add_argument(
"--low_cpu_mem_usage",
action="store_true",
help=(
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
"If passed, LLM loading time and RAM consumption will be benefited."
),
)
args = parser.parse_args()
# Sanity checks
@@ -374,6 +382,7 @@ def main():
args.model_name_or_path,
from_tf=bool(".ckpt" in args.model_name_or_path),
config=config,
low_cpu_mem_usage=args.low_cpu_mem_usage,
)
else:
logger.info("Training new model from scratch")

View File

@@ -116,6 +116,15 @@ class ModelArguments:
)
},
)
low_cpu_mem_usage: bool = field(
default=False,
metadata={
"help": (
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
"set True will benefit LLM loading time and RAM consumption."
)
},
)
def __post_init__(self):
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
@@ -395,6 +404,7 @@ def main():
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
low_cpu_mem_usage=model_args.low_cpu_mem_usage,
)
else:
logger.info("Training new model from scratch")

View File

@@ -227,6 +227,14 @@ def parse_args():
"Only applicable when `--with_tracking` is passed."
),
)
parser.add_argument(
"--low_cpu_mem_usage",
action="store_true",
help=(
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
"If passed, LLM loading time and RAM consumption will be benefited."
),
)
args = parser.parse_args()
# Sanity checks
@@ -379,6 +387,7 @@ def main():
args.model_name_or_path,
from_tf=bool(".ckpt" in args.model_name_or_path),
config=config,
low_cpu_mem_usage=args.low_cpu_mem_usage,
)
else:
logger.info("Training new model from scratch")

View File

@@ -104,6 +104,15 @@ class ModelArguments:
)
},
)
low_cpu_mem_usage: bool = field(
default=False,
metadata={
"help": (
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
"set True will benefit LLM loading time and RAM consumption."
)
},
)
def __post_init__(self):
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
@@ -375,6 +384,7 @@ def main():
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
low_cpu_mem_usage=model_args.low_cpu_mem_usage,
)
else:
logger.info("Training new model from scratch")