[WIP] lightning_base: support --lr_scheduler with multiple possibilities (#6232)
* support --lr_scheduler with multiple possibilities * correct the error message * add a note about supported schedulers * cleanup * cleanup2 * needs the argument default * style * add another assert in the test * implement requested changes * cleanups * fix relative import * cleanup
This commit is contained in:
@@ -20,6 +20,10 @@ from transformers import (
|
||||
AutoTokenizer,
|
||||
PretrainedConfig,
|
||||
PreTrainedTokenizer,
|
||||
)
|
||||
from transformers.optimization import (
|
||||
get_cosine_schedule_with_warmup,
|
||||
get_cosine_with_hard_restarts_schedule_with_warmup,
|
||||
get_linear_schedule_with_warmup,
|
||||
)
|
||||
|
||||
@@ -39,6 +43,19 @@ MODEL_MODES = {
|
||||
}
|
||||
|
||||
|
||||
# update this and the import above to support new schedulers from transformers.optimization
|
||||
arg_to_scheduler = {
|
||||
"linear": get_linear_schedule_with_warmup,
|
||||
"cosine": get_cosine_schedule_with_warmup,
|
||||
"cosine_w_restarts": get_cosine_with_hard_restarts_schedule_with_warmup,
|
||||
# polynomial': '', # TODO
|
||||
# '': get_constant_schedule, # not supported for now
|
||||
# '': get_constant_schedule_with_warmup, # not supported for now
|
||||
}
|
||||
arg_to_scheduler_choices = sorted(arg_to_scheduler.keys())
|
||||
arg_to_scheduler_metavar = "{" + ", ".join(arg_to_scheduler_choices) + "}"
|
||||
|
||||
|
||||
class BaseTransformer(pl.LightningModule):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -97,6 +114,14 @@ class BaseTransformer(pl.LightningModule):
|
||||
def load_hf_checkpoint(self, *args, **kwargs):
|
||||
self.model = self.model_type.from_pretrained(*args, **kwargs)
|
||||
|
||||
def get_lr_scheduler(self):
|
||||
get_schedule_func = arg_to_scheduler[self.hparams.lr_scheduler]
|
||||
scheduler = get_schedule_func(
|
||||
self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.total_steps
|
||||
)
|
||||
scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}
|
||||
return scheduler
|
||||
|
||||
def configure_optimizers(self):
|
||||
"""Prepare optimizer and schedule (linear warmup and decay)"""
|
||||
model = self.model
|
||||
@@ -114,10 +139,8 @@ class BaseTransformer(pl.LightningModule):
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)
|
||||
self.opt = optimizer
|
||||
|
||||
scheduler = get_linear_schedule_with_warmup(
|
||||
self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=self.total_steps
|
||||
)
|
||||
scheduler = {"scheduler": scheduler, "interval": "step", "frequency": 1}
|
||||
scheduler = self.get_lr_scheduler()
|
||||
|
||||
return [optimizer], [scheduler]
|
||||
|
||||
def test_step(self, batch, batch_nb):
|
||||
@@ -203,6 +226,14 @@ class BaseTransformer(pl.LightningModule):
|
||||
"--attention_dropout", type=float, help="Attention dropout probability (Optional). Goes into model.config",
|
||||
)
|
||||
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
|
||||
parser.add_argument(
|
||||
"--lr_scheduler",
|
||||
default="linear",
|
||||
choices=arg_to_scheduler_choices,
|
||||
metavar=arg_to_scheduler_metavar,
|
||||
type=str,
|
||||
help="Learning rate scheduler",
|
||||
)
|
||||
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
|
||||
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
|
||||
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
|
||||
|
||||
Reference in New Issue
Block a user