Added CamembertForQuestionAnswering (#2746)

* Added CamembertForQuestionAnswering

* fixed camembert tokenizer case
This commit is contained in:
maximeilluin
2020-02-21 18:01:02 +01:00
committed by GitHub
parent 5211d333bb
commit c749a543fa
4 changed files with 30 additions and 7 deletions

View File

@@ -38,6 +38,9 @@ from transformers import (
BertConfig,
BertForQuestionAnswering,
BertTokenizer,
CamembertConfig,
CamembertForQuestionAnswering,
CamembertTokenizer,
DistilBertConfig,
DistilBertForQuestionAnswering,
DistilBertTokenizer,
@@ -70,12 +73,16 @@ except ImportError:
logger = logging.getLogger(__name__)
ALL_MODELS = sum(
(tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig, RobertaConfig, XLNetConfig, XLMConfig)),
(
tuple(conf.pretrained_config_archive_map.keys())
for conf in (BertConfig, CamembertConfig, RobertaConfig, XLNetConfig, XLMConfig)
),
(),
)
MODEL_CLASSES = {
"bert": (BertConfig, BertForQuestionAnswering, BertTokenizer),
"camembert": (CamembertConfig, CamembertForQuestionAnswering, CamembertTokenizer),
"roberta": (RobertaConfig, RobertaForQuestionAnswering, RobertaTokenizer),
"xlnet": (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer),
"xlm": (XLMConfig, XLMForQuestionAnswering, XLMTokenizer),
@@ -212,7 +219,7 @@ def train(args, train_dataset, model, tokenizer):
"end_positions": batch[4],
}
if args.model_type in ["xlm", "roberta", "distilbert"]:
if args.model_type in ["xlm", "roberta", "distilbert", "camembert"]:
del inputs["token_type_ids"]
if args.model_type in ["xlnet", "xlm"]:
@@ -327,7 +334,7 @@ def evaluate(args, model, tokenizer, prefix=""):
"token_type_ids": batch[2],
}
if args.model_type in ["xlm", "roberta", "distilbert"]:
if args.model_type in ["xlm", "roberta", "distilbert", "camembert"]:
del inputs["token_type_ids"]
example_indices = batch[3]