add low_cpu_mem_usage option in run_clm.py example which will benefit… (#22288)
* add low_cpu_mem_usage option in run_clm.py example which will benefit LLM loading Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> * update all the example and README under language-modeling Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> --------- Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
@@ -178,6 +178,10 @@ sure all your batches have the same length.
|
|||||||
|
|
||||||
To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` to the command line. This is currently supported by `run_mlm.py` and `run_clm.py`.
|
To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` to the command line. This is currently supported by `run_mlm.py` and `run_clm.py`.
|
||||||
|
|
||||||
|
## Low Cpu Memory Usage
|
||||||
|
|
||||||
|
To use low cpu memory mode which can be very useful for LLM, add `--low_cpu_mem_usage` to the command line. This is currently supported by `run_clm.py`,`run_mlm.py`, `run_plm.py`,`run_mlm_no_trainer.py` and `run_clm_no_trainer.py`.
|
||||||
|
|
||||||
## Creating a model on the fly
|
## Creating a model on the fly
|
||||||
|
|
||||||
When training a model from scratch, configuration values may be overridden with the help of `--config_overrides`:
|
When training a model from scratch, configuration values may be overridden with the help of `--config_overrides`:
|
||||||
|
|||||||
@@ -130,6 +130,15 @@ class ModelArguments:
|
|||||||
"choices": ["auto", "bfloat16", "float16", "float32"],
|
"choices": ["auto", "bfloat16", "float16", "float32"],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
low_cpu_mem_usage: bool = field(
|
||||||
|
default=False,
|
||||||
|
metadata={
|
||||||
|
"help": (
|
||||||
|
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||||
|
"set True will benefit LLM loading time and RAM consumption."
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
|
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
|
||||||
@@ -408,6 +417,7 @@ def main():
|
|||||||
revision=model_args.model_revision,
|
revision=model_args.model_revision,
|
||||||
use_auth_token=True if model_args.use_auth_token else None,
|
use_auth_token=True if model_args.use_auth_token else None,
|
||||||
torch_dtype=torch_dtype,
|
torch_dtype=torch_dtype,
|
||||||
|
low_cpu_mem_usage=model_args.low_cpu_mem_usage,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
model = AutoModelForCausalLM.from_config(config)
|
model = AutoModelForCausalLM.from_config(config)
|
||||||
|
|||||||
@@ -220,6 +220,14 @@ def parse_args():
|
|||||||
"Only applicable when `--with_tracking` is passed."
|
"Only applicable when `--with_tracking` is passed."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--low_cpu_mem_usage",
|
||||||
|
action="store_true",
|
||||||
|
help=(
|
||||||
|
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||||
|
"If passed, LLM loading time and RAM consumption will be benefited."
|
||||||
|
),
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Sanity checks
|
# Sanity checks
|
||||||
@@ -374,6 +382,7 @@ def main():
|
|||||||
args.model_name_or_path,
|
args.model_name_or_path,
|
||||||
from_tf=bool(".ckpt" in args.model_name_or_path),
|
from_tf=bool(".ckpt" in args.model_name_or_path),
|
||||||
config=config,
|
config=config,
|
||||||
|
low_cpu_mem_usage=args.low_cpu_mem_usage,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info("Training new model from scratch")
|
logger.info("Training new model from scratch")
|
||||||
|
|||||||
@@ -116,6 +116,15 @@ class ModelArguments:
|
|||||||
)
|
)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
low_cpu_mem_usage: bool = field(
|
||||||
|
default=False,
|
||||||
|
metadata={
|
||||||
|
"help": (
|
||||||
|
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||||
|
"set True will benefit LLM loading time and RAM consumption."
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
|
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
|
||||||
@@ -395,6 +404,7 @@ def main():
|
|||||||
cache_dir=model_args.cache_dir,
|
cache_dir=model_args.cache_dir,
|
||||||
revision=model_args.model_revision,
|
revision=model_args.model_revision,
|
||||||
use_auth_token=True if model_args.use_auth_token else None,
|
use_auth_token=True if model_args.use_auth_token else None,
|
||||||
|
low_cpu_mem_usage=model_args.low_cpu_mem_usage,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info("Training new model from scratch")
|
logger.info("Training new model from scratch")
|
||||||
|
|||||||
@@ -227,6 +227,14 @@ def parse_args():
|
|||||||
"Only applicable when `--with_tracking` is passed."
|
"Only applicable when `--with_tracking` is passed."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--low_cpu_mem_usage",
|
||||||
|
action="store_true",
|
||||||
|
help=(
|
||||||
|
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||||
|
"If passed, LLM loading time and RAM consumption will be benefited."
|
||||||
|
),
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Sanity checks
|
# Sanity checks
|
||||||
@@ -379,6 +387,7 @@ def main():
|
|||||||
args.model_name_or_path,
|
args.model_name_or_path,
|
||||||
from_tf=bool(".ckpt" in args.model_name_or_path),
|
from_tf=bool(".ckpt" in args.model_name_or_path),
|
||||||
config=config,
|
config=config,
|
||||||
|
low_cpu_mem_usage=args.low_cpu_mem_usage,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info("Training new model from scratch")
|
logger.info("Training new model from scratch")
|
||||||
|
|||||||
@@ -104,6 +104,15 @@ class ModelArguments:
|
|||||||
)
|
)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
low_cpu_mem_usage: bool = field(
|
||||||
|
default=False,
|
||||||
|
metadata={
|
||||||
|
"help": (
|
||||||
|
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded."
|
||||||
|
"set True will benefit LLM loading time and RAM consumption."
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
|
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
|
||||||
@@ -375,6 +384,7 @@ def main():
|
|||||||
cache_dir=model_args.cache_dir,
|
cache_dir=model_args.cache_dir,
|
||||||
revision=model_args.model_revision,
|
revision=model_args.model_revision,
|
||||||
use_auth_token=True if model_args.use_auth_token else None,
|
use_auth_token=True if model_args.use_auth_token else None,
|
||||||
|
low_cpu_mem_usage=model_args.low_cpu_mem_usage,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info("Training new model from scratch")
|
logger.info("Training new model from scratch")
|
||||||
|
|||||||
Reference in New Issue
Block a user