Merge branch 'master' into saving-and-resuming

This commit is contained in:
Thomas Wolf
2019-12-21 14:29:59 +01:00
committed by GitHub
135 changed files with 9041 additions and 1529 deletions

View File

@@ -52,6 +52,9 @@ from transformers import (WEIGHTS_NAME, BertConfig,
AlbertConfig,
AlbertForSequenceClassification,
AlbertTokenizer,
XLMRobertaConfig,
XLMRobertaForSequenceClassification,
XLMRobertaTokenizer,
)
from transformers import AdamW, get_linear_schedule_with_warmup
@@ -72,7 +75,8 @@ MODEL_CLASSES = {
'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer),
'roberta': (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer),
'distilbert': (DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer),
'albert': (AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer)
'albert': (AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer),
'xlmroberta': (XLMRobertaConfig, XLMRobertaForSequenceClassification, XLMRobertaTokenizer),
}
@@ -337,9 +341,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
else:
logger.info("Creating features from dataset file at %s", args.data_dir)
label_list = processor.get_labels()
if task in ['mnli', 'mnli-mm'] and args.model_type in ['roberta']:
if task in ['mnli', 'mnli-mm'] and args.model_type in ['roberta', 'xlmroberta']:
# HACK(label indices are swapped in RoBERTa pretrained model)
label_list[1], label_list[2] = label_list[2], label_list[1]
label_list[1], label_list[2] = label_list[2], label_list[1]
examples = processor.get_dev_examples(args.data_dir) if evaluate else processor.get_train_examples(args.data_dir)
features = convert_examples_to_features(examples,
tokenizer,
@@ -413,7 +417,7 @@ def main():
parser.add_argument("--learning_rate", default=5e-5, type=float,
help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.0, type=float,
help="Weight deay if we apply some.")
help="Weight decay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float,
help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float,