From 51690699976ee47bfce0765521272c78261cdbda Mon Sep 17 00:00:00 2001 From: Matej Svejda Date: Wed, 30 Jan 2019 11:47:25 +0100 Subject: [PATCH] make examples consistent, revert error in num_train_steps calculation --- examples/run_classifier.py | 6 +++--- examples/run_lm_finetuning.py | 14 +++++++++----- examples/run_squad.py | 6 +++--- examples/run_squad2.py | 6 +++--- examples/run_swag.py | 6 +++--- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/examples/run_classifier.py b/examples/run_classifier.py index 0b60eb66ed..52205552ca 100644 --- a/examples/run_classifier.py +++ b/examples/run_classifier.py @@ -411,7 +411,7 @@ def main(): raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) - args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) + args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) @@ -441,8 +441,8 @@ def main(): num_train_steps = None if args.do_train: train_examples = processor.get_train_examples(args.data_dir) - num_train_steps = int( - len(train_examples) / args.train_batch_size * args.num_train_epochs) + num_train_steps = + len(train_examples) // args.train_batch_size // args.gradient_accumulation_steps * args.num_train_epochs # Prepare model model = BertForSequenceClassification.from_pretrained(args.bert_model, diff --git a/examples/run_lm_finetuning.py b/examples/run_lm_finetuning.py index 2e26842c14..c9c71ad5a1 100644 --- a/examples/run_lm_finetuning.py +++ b/examples/run_lm_finetuning.py @@ -497,7 +497,7 @@ def main(): raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) - args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) + args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) @@ -520,8 +520,8 @@ def main(): print("Loading Train Dataset", args.train_file) train_dataset = BERTDataset(args.train_file, tokenizer, seq_len=args.max_seq_length, corpus_lines=None, on_memory=args.on_memory) - num_train_steps = int( - len(train_dataset) / args.train_batch_size * args.num_train_epochs) + num_train_steps = + len(train_dataset) // args.train_batch_size // args.gradient_accumulation_steps * args.num_train_epochs # Prepare model model = BertForPreTraining.from_pretrained(args.bert_model) @@ -544,6 +544,10 @@ def main(): {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] + + t_total = num_train_steps + if args.local_rank != -1: + t_total = t_total // torch.distributed.get_world_size() if args.fp16: try: from apex.optimizers import FP16_Optimizer @@ -564,7 +568,7 @@ def main(): optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, - t_total=num_train_steps) + t_total=t_total) global_step = 0 if args.do_train: @@ -604,7 +608,7 @@ def main(): if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically - lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_steps, args.warmup_proportion) + lr_this_step = args.learning_rate * warmup_linear(global_step/t_total, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() diff --git a/examples/run_squad.py b/examples/run_squad.py index 0881e82aba..421821006e 100644 --- a/examples/run_squad.py +++ b/examples/run_squad.py @@ -757,7 +757,7 @@ def main(): raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) - args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) + args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) @@ -788,8 +788,8 @@ def main(): if args.do_train: train_examples = read_squad_examples( input_file=args.train_file, is_training=True) - num_train_steps = int( - len(train_examples) / args.train_batch_size * args.num_train_epochs) + num_train_steps = + len(train_examples) // args.train_batch_size // args.gradient_accumulation_steps * args.num_train_epochs # Prepare model model = BertForQuestionAnswering.from_pretrained(args.bert_model, diff --git a/examples/run_squad2.py b/examples/run_squad2.py index ad5e820db8..6adad7d8ea 100644 --- a/examples/run_squad2.py +++ b/examples/run_squad2.py @@ -850,7 +850,7 @@ def main(): raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) - args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) + args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) @@ -881,8 +881,8 @@ def main(): if args.do_train: train_examples = read_squad_examples( input_file=args.train_file, is_training=True) - num_train_steps = int( - len(train_examples) / args.train_batch_size * args.num_train_epochs) + num_train_steps = + len(train_examples) // args.train_batch_size // args.gradient_accumulation_steps * args.num_train_epochs # Prepare model model = BertForQuestionAnswering.from_pretrained(args.bert_model, diff --git a/examples/run_swag.py b/examples/run_swag.py index 597b093a26..1856118ac5 100644 --- a/examples/run_swag.py +++ b/examples/run_swag.py @@ -331,7 +331,7 @@ def main(): raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) - args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) + args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) @@ -352,8 +352,8 @@ def main(): num_train_steps = None if args.do_train: train_examples = read_swag_examples(os.path.join(args.data_dir, 'train.csv'), is_training = True) - num_train_steps = int( - len(train_examples) / args.train_batch_size * args.num_train_epochs) + num_train_steps = + len(train_examples) // args.train_batch_size // args.gradient_accumulation_steps * args.num_train_epochs # Prepare model model = BertForMultipleChoice.from_pretrained(args.bert_model,