Merge branch 'master' into saving-and-resuming

2019-12-21 14:41:39 +01:00
parent eeb70cdd77 18601c3b6e
commit 518ba748e0
7 changed files with 316 additions and 208 deletions
--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -39,25 +39,27 @@ except:
 from tqdm import tqdm, trange

 from transformers import (WEIGHTS_NAME, BertConfig,
-                          BertForQuestionAnswering, BertTokenizer,
-                          XLMConfig, XLMForQuestionAnswering,
-                          XLMTokenizer, XLNetConfig,
-                          XLNetForQuestionAnswering,
-                          XLNetTokenizer,
-                          DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer,
-                          AlbertConfig, AlbertForQuestionAnswering, AlbertTokenizer,
-                          XLMConfig, XLMForQuestionAnswering, XLMTokenizer,
-                          )
+                                  BertForQuestionAnswering, BertTokenizer,
+                                  RobertaForQuestionAnswering, RobertaTokenizer, RobertaConfig,
+                                  XLMConfig, XLMForQuestionAnswering,
+                                  XLMTokenizer, XLNetConfig,
+                                  XLNetForQuestionAnswering,
+                                  XLNetTokenizer,
+                                  DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer,
+                                  AlbertConfig, AlbertForQuestionAnswering, AlbertTokenizer,
+                                  XLMConfig, XLMForQuestionAnswering, XLMTokenizer,
+                                  )

 from transformers import AdamW, get_linear_schedule_with_warmup, squad_convert_examples_to_features

 logger = logging.getLogger(__name__)

-ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys())
-                  for conf in (BertConfig, XLNetConfig, XLMConfig)), ())
+ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) \
+                  for conf in (BertConfig, RobertaConfig, XLNetConfig, XLMConfig)), ())

 MODEL_CLASSES = {
    'bert': (BertConfig, BertForQuestionAnswering, BertTokenizer),
+    'roberta': (RobertaConfig, RobertaForQuestionAnswering, RobertaTokenizer),
    'xlnet': (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer),
    'xlm': (XLMConfig, XLMForQuestionAnswering, XLMTokenizer),
    'distilbert': (DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer),
@@ -191,13 +193,11 @@ def train(args, train_dataset, model, tokenizer):
            inputs = {
                'input_ids':       batch[0],
                'attention_mask':  batch[1],
+                'token_type_ids': None if args.model_type in ['xlm', 'roberta', 'distilbert'] else batch[2],
                'start_positions': batch[3],
-                'end_positions':   batch[4]
+                'end_positions':   batch[4],
            }

-            if args.model_type != 'distilbert':
-                inputs['token_type_ids'] = None if args.model_type == 'xlm' else batch[2]
-
            if args.model_type in ['xlnet', 'xlm']:
                inputs.update({'cls_index': batch[5],
                               'p_mask':       batch[6]})
@@ -315,13 +315,9 @@ def evaluate(args, model, tokenizer, prefix=""):
        with torch.no_grad():
            inputs = {
                'input_ids':      batch[0],
-                'attention_mask': batch[1]
+                'attention_mask': batch[1],
+                'token_type_ids': None if args.model_type in ['xlm', 'roberta', 'distilbert'] else batch[2],
            }
-
-            if args.model_type != 'distilbert':
-                # XLM don't use segment_ids
-                inputs['token_type_ids'] = None if args.model_type == 'xlm' else batch[2]
-
            example_indices = batch[3]

            # XLNet and XLM use more arguments for their predictions
@@ -390,9 +386,9 @@ def evaluate(args, model, tokenizer, prefix=""):
                                                    args.version_2_with_negative, tokenizer, args.verbose_logging)
    else:
        predictions = compute_predictions_logits(examples, features, all_results, args.n_best_size,
-                                                 args.max_answer_length, args.do_lower_case, output_prediction_file,
-                                                 output_nbest_file, output_null_log_odds_file, args.verbose_logging,
-                                                 args.version_2_with_negative, args.null_score_diff_threshold)
+                        args.max_answer_length, args.do_lower_case, output_prediction_file,
+                        output_nbest_file, output_null_log_odds_file, args.verbose_logging,
+                        args.version_2_with_negative, args.null_score_diff_threshold)

    # Compute the F1 and exact scores.
    results = squad_evaluate(examples, predictions)
@@ -449,7 +445,8 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
            doc_stride=args.doc_stride,
            max_query_length=args.max_query_length,
            is_training=not evaluate,
-            return_dataset='pt'
+            return_dataset='pt',
+            threads=args.threads,
        )

        if args.local_rank in [-1, 0]:
@@ -568,10 +565,10 @@ def main():
    parser.add_argument('--fp16_opt_level', type=str, default='O1',
                        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
                             "See details at https://nvidia.github.io/apex/amp.html")
-    parser.add_argument('--server_ip', type=str, default='',
-                        help="Can be used for distant debugging.")
-    parser.add_argument('--server_port', type=str, default='',
-                        help="Can be used for distant debugging.")
+    parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
+    parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
+
+    parser.add_argument('--threads', type=int, default=1, help='multiple threads for converting example to features')
    args = parser.parse_args()

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir: