add sep_toekn between question and choice

2019-08-28 16:36:21 +08:00
parent 43c243254a
commit df52abe373
1 changed files with 11 additions and 5 deletions
--- a/examples/single_model_scripts/utils_multiple_choice.py
+++ b/examples/single_model_scripts/utils_multiple_choice.py
@@ -329,7 +329,12 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
            if example.question.find("_") != -1:
                tokens_b = tokenizer.tokenize(example.question.replace("_", ending))
            else:
-                tokens_b = tokenizer.tokenize(example.question + " " + ending)
+                tokens_b = tokenizer.tokenize(example.question)
                tokens_b += [sep_token]
                if sep_token_extra:
                    tokens_b += [sep_token]
                tokens_b += tokenizer.tokenize(ending)
            special_tokens_count = 4 if sep_token_extra else 3
            _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
@@ -425,10 +430,11 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length):
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
-        if len(tokens_a) > len(tokens_b):
+        # if len(tokens_a) > len(tokens_b):
        #     tokens_a.pop()
        # else:
        #     tokens_b.pop()
        tokens_a.pop()
        else:
            tokens_b.pop()
 processors = {