Merge branch 'master' of https://github.com/huggingface/pytorch-pretrained-BERT
This commit is contained in:
@@ -427,7 +427,10 @@ def main():
|
|||||||
type=int,
|
type=int,
|
||||||
default=-1,
|
default=-1,
|
||||||
help="local_rank for distributed training on gpus")
|
help="local_rank for distributed training on gpus")
|
||||||
|
parser.add_argument('--seed',
|
||||||
|
type=int,
|
||||||
|
default=42,
|
||||||
|
help="random seed for initialization")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
processors = {
|
processors = {
|
||||||
@@ -445,6 +448,11 @@ def main():
|
|||||||
# print("Initializing the distributed backend: NCCL")
|
# print("Initializing the distributed backend: NCCL")
|
||||||
print("device", device, "n_gpu", n_gpu)
|
print("device", device, "n_gpu", n_gpu)
|
||||||
|
|
||||||
|
random.seed(args.seed)
|
||||||
|
np.random.seed(args.seed)
|
||||||
|
torch.manual_seed(args.seed)
|
||||||
|
if n_gpu>0: torch.cuda.manual_seed_all(args.seed)
|
||||||
|
|
||||||
if not args.do_train and not args.do_eval:
|
if not args.do_train and not args.do_eval:
|
||||||
raise ValueError("At least one of `do_train` or `do_eval` must be True.")
|
raise ValueError("At least one of `do_train` or `do_eval` must be True.")
|
||||||
|
|
||||||
@@ -529,10 +537,10 @@ def main():
|
|||||||
label_ids = label_ids.to(device)
|
label_ids = label_ids.to(device)
|
||||||
|
|
||||||
loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
|
loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
|
||||||
total_tr_loss += loss.item()
|
total_tr_loss += loss.sum().item() # sum() is to account for multi-gpu support.
|
||||||
nb_tr_examples += input_ids.size(0)
|
nb_tr_examples += input_ids.size(0)
|
||||||
model.zero_grad()
|
model.zero_grad()
|
||||||
loss.backward()
|
loss.sum().backward() # sum() is to account for multi-gpu support.
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
global_step += 1
|
global_step += 1
|
||||||
|
|
||||||
@@ -573,7 +581,7 @@ def main():
|
|||||||
label_ids = label_ids.to('cpu').numpy()
|
label_ids = label_ids.to('cpu').numpy()
|
||||||
tmp_eval_accuracy = accuracy(logits, label_ids)
|
tmp_eval_accuracy = accuracy(logits, label_ids)
|
||||||
|
|
||||||
eval_loss += tmp_eval_loss.item()
|
eval_loss += tmp_eval_loss.sum().item()
|
||||||
eval_accuracy += tmp_eval_accuracy
|
eval_accuracy += tmp_eval_accuracy
|
||||||
|
|
||||||
nb_eval_examples += input_ids.size(0)
|
nb_eval_examples += input_ids.size(0)
|
||||||
|
|||||||
@@ -745,6 +745,10 @@ def main():
|
|||||||
type=int,
|
type=int,
|
||||||
default=-1,
|
default=-1,
|
||||||
help="local_rank for distributed training on gpus")
|
help="local_rank for distributed training on gpus")
|
||||||
|
parser.add_argument('--seed',
|
||||||
|
type=int,
|
||||||
|
default=42,
|
||||||
|
help="random seed for initialization")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -757,6 +761,11 @@ def main():
|
|||||||
# print("Initializing the distributed backend: NCCL")
|
# print("Initializing the distributed backend: NCCL")
|
||||||
print("device", device, "n_gpu", n_gpu)
|
print("device", device, "n_gpu", n_gpu)
|
||||||
|
|
||||||
|
random.seed(args.seed)
|
||||||
|
np.random.seed(args.seed)
|
||||||
|
torch.manual_seed(args.seed)
|
||||||
|
if n_gpu>0: torch.cuda.manual_seed_all(args.seed)
|
||||||
|
|
||||||
if not args.do_train and not args.do_predict:
|
if not args.do_train and not args.do_predict:
|
||||||
raise ValueError("At least one of `do_train` or `do_predict` must be True.")
|
raise ValueError("At least one of `do_train` or `do_predict` must be True.")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user