add sep_toekn between question and choice
This commit is contained in:
@@ -329,7 +329,12 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
|
|||||||
if example.question.find("_") != -1:
|
if example.question.find("_") != -1:
|
||||||
tokens_b = tokenizer.tokenize(example.question.replace("_", ending))
|
tokens_b = tokenizer.tokenize(example.question.replace("_", ending))
|
||||||
else:
|
else:
|
||||||
tokens_b = tokenizer.tokenize(example.question + " " + ending)
|
tokens_b = tokenizer.tokenize(example.question)
|
||||||
|
tokens_b += [sep_token]
|
||||||
|
if sep_token_extra:
|
||||||
|
tokens_b += [sep_token]
|
||||||
|
tokens_b += tokenizer.tokenize(ending)
|
||||||
|
|
||||||
special_tokens_count = 4 if sep_token_extra else 3
|
special_tokens_count = 4 if sep_token_extra else 3
|
||||||
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
|
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
|
||||||
|
|
||||||
@@ -425,10 +430,11 @@ def _truncate_seq_pair(tokens_a, tokens_b, max_length):
|
|||||||
total_length = len(tokens_a) + len(tokens_b)
|
total_length = len(tokens_a) + len(tokens_b)
|
||||||
if total_length <= max_length:
|
if total_length <= max_length:
|
||||||
break
|
break
|
||||||
if len(tokens_a) > len(tokens_b):
|
# if len(tokens_a) > len(tokens_b):
|
||||||
|
# tokens_a.pop()
|
||||||
|
# else:
|
||||||
|
# tokens_b.pop()
|
||||||
tokens_a.pop()
|
tokens_a.pop()
|
||||||
else:
|
|
||||||
tokens_b.pop()
|
|
||||||
|
|
||||||
|
|
||||||
processors = {
|
processors = {
|
||||||
|
|||||||
Reference in New Issue
Block a user