loads the tokenizer for each checkpoint, to solve the reproducability issue

This commit is contained in:
Rabeeh KARIMI
2019-08-14 10:58:26 +02:00
parent f63ff536ad
commit 3d47a7f8ab

View File

@@ -463,6 +463,7 @@ def main():
for checkpoint in checkpoints: for checkpoint in checkpoints:
global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
model = model_class.from_pretrained(checkpoint) model = model_class.from_pretrained(checkpoint)
tokenizer = tokenizer_class.from_pretrained(checkpoint)
model.to(args.device) model.to(args.device)
result = evaluate(args, model, tokenizer, prefix=global_step) result = evaluate(args, model, tokenizer, prefix=global_step)
result = dict((k + '_{}'.format(global_step), v) for k, v in result.items()) result = dict((k + '_{}'.format(global_step), v) for k, v in result.items())