diff --git a/modeling.py b/modeling.py index c467e8266e..860cb939a4 100644 --- a/modeling.py +++ b/modeling.py @@ -467,6 +467,6 @@ class BertForQuestionAnswering(nn.Module): start_loss = loss_fct(start_logits, start_positions) end_loss = loss_fct(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2 - return total_loss, (start_logits, end_logits) + return total_loss else: return start_logits, end_logits diff --git a/run_classifier.py b/run_classifier.py index c19c6f9ac0..41c7459bd3 100644 --- a/run_classifier.py +++ b/run_classifier.py @@ -514,13 +514,13 @@ def main(): train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) model.train() - for epoch in trange(int(args.num_train_epochs), desc="Epoch"): + for _ in trange(int(args.num_train_epochs), desc="Epoch"): tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch - loss, _ = model(input_ids, segment_ids, input_mask, label_ids) + loss = model(input_ids, segment_ids, input_mask, label_ids) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.gradient_accumulation_steps > 1: @@ -564,7 +564,8 @@ def main(): segment_ids = segment_ids.to(device) label_ids = label_ids.to(device) - tmp_eval_loss, logits = model(input_ids, segment_ids, input_mask, label_ids) + with torch.no_grad(): + tmp_eval_loss, logits = model(input_ids, segment_ids, input_mask, label_ids) logits = logits.detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() diff --git a/run_squad.py b/run_squad.py index a25893e1d9..78dff7dea5 100644 --- a/run_squad.py +++ b/run_squad.py @@ -855,11 +855,11 @@ def main(): train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) model.train() - for epoch in trange(int(args.num_train_epochs), desc="Epoch"): + for _ in trange(int(args.num_train_epochs), desc="Epoch"): for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, start_positions, end_positions = batch - loss, _ = model(input_ids, segment_ids, input_mask, start_positions, end_positions) + loss = model(input_ids, segment_ids, input_mask, start_positions, end_positions) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.gradient_accumulation_steps > 1: