[Examples] create model with custom config on the fly (#11798)

* create custom model on the flight

* better wording

* add update_from_string

* cleanup

* cleanup

* Update src/transformers/configuration_utils.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* more bool options

* style

* fix logger

* add test

* add the doc

* assert on conflict of options

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
This commit is contained in:
Stas Bekman
2021-05-25 10:40:49 -07:00
committed by GitHub
parent 6287c929c1
commit 1b6530104d
4 changed files with 94 additions and 3 deletions

View File

@@ -161,3 +161,21 @@ concatenates all texts and then splits them in blocks of the same length).
**Note:** On TPU, you should use the flag `--pad_to_max_length` in conjunction with the `--line_by_line` flag to make
sure all your batches have the same length.
## Creating a model on the fly
When training a model from scratch, configuration values may be overridden with the help of `--config_overrides`:
```bash
python run_clm.py --model_type gpt2 --tokenizer_name gpt2 \ --config_overrides="n_embd=1024,n_head=16,n_layer=48,n_positions=102" \
[...]
```
At the moment this is only available in `run_clm.py` but eventually should be copied to all other LM examples.
This feature can also be used to activate gradient checkpointing by passing:
```
--config_overrides "gradient_checkpointing=true,use_cache=False"
```

View File

@@ -75,6 +75,13 @@ class ModelArguments:
default=None,
metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
)
config_overrides: Optional[str] = field(
default=None,
metadata={
"help": "Override some existing default config settings when a model is trained from scratch. Example: "
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
},
)
config_name: Optional[str] = field(
default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
)
@@ -101,6 +108,12 @@ class ModelArguments:
},
)
def __post_init__(self):
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
raise ValueError(
"--config_overrides can't be used in combination with --config_name or --model_name_or_path"
)
@dataclass
class DataTrainingArguments:
@@ -279,6 +292,9 @@ def main():
else:
config = CONFIG_MAPPING[model_args.model_type]()
logger.warning("You are instantiating a new config instance from scratch.")
if model_args.config_overrides is not None:
logger.info(f"Overriding config: {model_args.config_overrides}")
config.update_from_string(model_args.config_overrides)
tokenizer_kwargs = {
"cache_dir": model_args.cache_dir,
@@ -306,8 +322,9 @@ def main():
use_auth_token=True if model_args.use_auth_token else None,
)
else:
logger.info("Training new model from scratch")
model = AutoModelForCausalLM.from_config(config)
n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())
logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
model.resize_token_embeddings(len(tokenizer))