From df52abe3733484bf62069e819680909e349bac72 Mon Sep 17 00:00:00 2001
From: erenup <ping.nie@pku.edu.cn>
Date: Wed, 28 Aug 2019 16:36:21 +0800
Subject: [PATCH] add sep_toekn between question and choice

---
 .../utils_multiple_choice.py                     | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/examples/single_model_scripts/utils_multiple_choice.py b/examples/single_model_scripts/utils_multiple_choice.py
index 34505195ed..6a9fa0e64f 100644
--- a/examples/single_model_scripts/utils_multiple_choice.py
+++ b/examples/single_model_scripts/utils_multiple_choice.py
@@ -329,7 +329,12 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
             if example.question.find("_") != -1:
                 tokens_b = tokenizer.tokenize(example.question.replace("_", ending))
             else:
-                tokens_b = tokenizer.tokenize(example.question + " " + ending)
+                tokens_b = tokenizer.tokenize(example.question)
+                tokens_b += [sep_token]
+                if sep_token_extra:
+                    tokens_b += [sep_token]
+                tokens_b += tokenizer.tokenize(ending)
+
             special_tokens_count = 4 if sep_token_extra else 3
             _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
 
@@ -425,10 +430,11 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length):
         total_length = len(tokens_a) + len(tokens_b)
         if total_length <= max_length:
             break
-        if len(tokens_a) > len(tokens_b):
-            tokens_a.pop()
-        else:
-            tokens_b.pop()
+        # if len(tokens_a) > len(tokens_b):
+        #     tokens_a.pop()
+        # else:
+        #     tokens_b.pop()
+        tokens_a.pop()
 
 
 processors = {