Fixed embarrassing indentation problem
This commit is contained in:
@@ -241,8 +241,7 @@ def main():
|
|||||||
from apex.optimizers import FP16_Optimizer
|
from apex.optimizers import FP16_Optimizer
|
||||||
from apex.optimizers import FusedAdam
|
from apex.optimizers import FusedAdam
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
|
||||||
"Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
|
|
||||||
|
|
||||||
optimizer = FusedAdam(optimizer_grouped_parameters,
|
optimizer = FusedAdam(optimizer_grouped_parameters,
|
||||||
lr=args.learning_rate,
|
lr=args.learning_rate,
|
||||||
@@ -259,57 +258,57 @@ def main():
|
|||||||
warmup=args.warmup_proportion,
|
warmup=args.warmup_proportion,
|
||||||
t_total=num_train_optimization_steps)
|
t_total=num_train_optimization_steps)
|
||||||
|
|
||||||
global_step = 0
|
global_step = 0
|
||||||
logging.info("***** Running training *****")
|
logging.info("***** Running training *****")
|
||||||
logging.info(f" Num examples = {total_train_examples}")
|
logging.info(f" Num examples = {total_train_examples}")
|
||||||
logging.info(" Batch size = %d", args.train_batch_size)
|
logging.info(" Batch size = %d", args.train_batch_size)
|
||||||
logging.info(" Num steps = %d", num_train_optimization_steps)
|
logging.info(" Num steps = %d", num_train_optimization_steps)
|
||||||
model.train()
|
model.train()
|
||||||
for epoch in range(args.epochs):
|
for epoch in range(args.epochs):
|
||||||
epoch_dataset = PregeneratedDataset(epoch=epoch, training_path=args.pregenerated_data, tokenizer=tokenizer,
|
epoch_dataset = PregeneratedDataset(epoch=epoch, training_path=args.pregenerated_data, tokenizer=tokenizer,
|
||||||
num_data_epochs=num_data_epochs)
|
num_data_epochs=num_data_epochs)
|
||||||
if args.local_rank == -1:
|
if args.local_rank == -1:
|
||||||
train_sampler = RandomSampler(epoch_dataset)
|
train_sampler = RandomSampler(epoch_dataset)
|
||||||
else:
|
else:
|
||||||
train_sampler = DistributedSampler(epoch_dataset)
|
train_sampler = DistributedSampler(epoch_dataset)
|
||||||
train_dataloader = DataLoader(epoch_dataset, sampler=train_sampler, batch_size=args.train_batch_size)
|
train_dataloader = DataLoader(epoch_dataset, sampler=train_sampler, batch_size=args.train_batch_size)
|
||||||
tr_loss = 0
|
tr_loss = 0
|
||||||
nb_tr_examples, nb_tr_steps = 0, 0
|
nb_tr_examples, nb_tr_steps = 0, 0
|
||||||
with tqdm(total=len(train_dataloader), desc=f"Epoch {epoch}") as pbar:
|
with tqdm(total=len(train_dataloader), desc=f"Epoch {epoch}") as pbar:
|
||||||
for step, batch in enumerate(train_dataloader):
|
for step, batch in enumerate(train_dataloader):
|
||||||
batch = tuple(t.to(device) for t in batch)
|
batch = tuple(t.to(device) for t in batch)
|
||||||
input_ids, input_mask, segment_ids, lm_label_ids, is_next = batch
|
input_ids, input_mask, segment_ids, lm_label_ids, is_next = batch
|
||||||
loss = model(input_ids, segment_ids, input_mask, lm_label_ids, is_next)
|
loss = model(input_ids, segment_ids, input_mask, lm_label_ids, is_next)
|
||||||
if n_gpu > 1:
|
if n_gpu > 1:
|
||||||
loss = loss.mean() # mean() to average on multi-gpu.
|
loss = loss.mean() # mean() to average on multi-gpu.
|
||||||
if args.gradient_accumulation_steps > 1:
|
if args.gradient_accumulation_steps > 1:
|
||||||
loss = loss / args.gradient_accumulation_steps
|
loss = loss / args.gradient_accumulation_steps
|
||||||
|
if args.fp16:
|
||||||
|
optimizer.backward(loss)
|
||||||
|
else:
|
||||||
|
loss.backward()
|
||||||
|
tr_loss += loss.item()
|
||||||
|
nb_tr_examples += input_ids.size(0)
|
||||||
|
nb_tr_steps += 1
|
||||||
|
pbar.update(1)
|
||||||
|
mean_loss = tr_loss / nb_tr_steps
|
||||||
|
pbar.set_postfix_str(f"Loss: {mean_loss:.5f}")
|
||||||
|
if (step + 1) % args.gradient_accumulation_steps == 0:
|
||||||
if args.fp16:
|
if args.fp16:
|
||||||
optimizer.backward(loss)
|
# modify learning rate with special warm up BERT uses
|
||||||
else:
|
# if args.fp16 is False, BertAdam is used that handles this automatically
|
||||||
loss.backward()
|
lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion)
|
||||||
tr_loss += loss.item()
|
for param_group in optimizer.param_groups:
|
||||||
nb_tr_examples += input_ids.size(0)
|
param_group['lr'] = lr_this_step
|
||||||
nb_tr_steps += 1
|
optimizer.step()
|
||||||
pbar.update(1)
|
optimizer.zero_grad()
|
||||||
mean_loss = tr_loss / nb_tr_steps
|
global_step += 1
|
||||||
pbar.set_postfix_str(f"Loss: {mean_loss:.5f}")
|
|
||||||
if (step + 1) % args.gradient_accumulation_steps == 0:
|
|
||||||
if args.fp16:
|
|
||||||
# modify learning rate with special warm up BERT uses
|
|
||||||
# if args.fp16 is False, BertAdam is used that handles this automatically
|
|
||||||
lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion)
|
|
||||||
for param_group in optimizer.param_groups:
|
|
||||||
param_group['lr'] = lr_this_step
|
|
||||||
optimizer.step()
|
|
||||||
optimizer.zero_grad()
|
|
||||||
global_step += 1
|
|
||||||
|
|
||||||
# Save a trained model
|
# Save a trained model
|
||||||
logging.info("** ** * Saving fine-tuned model ** ** * ")
|
logging.info("** ** * Saving fine-tuned model ** ** * ")
|
||||||
model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self
|
model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self
|
||||||
output_model_file = args.output_dir / "pytorch_model.bin"
|
output_model_file = args.output_dir / "pytorch_model.bin"
|
||||||
torch.save(model_to_save.state_dict(), str(output_model_file))
|
torch.save(model_to_save.state_dict(), str(output_model_file))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user