Correct truncation for RoBERTa in 2-input GLUE
This commit is contained in:
committed by
Lysandre Debut
parent
7e7fc53da5
commit
d8923270e6
@@ -422,8 +422,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
|
||||
tokens_b = tokenizer.tokenize(example.text_b)
|
||||
# Modifies `tokens_a` and `tokens_b` in place so that the total
|
||||
# length is less than the specified length.
|
||||
# Account for [CLS], [SEP], [SEP] with "- 3"
|
||||
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
|
||||
# Account for [CLS], [SEP], [SEP] with "- 3". " -4" for RoBERTa.
|
||||
special_tokens_count = 4 if sep_token_extra else 3
|
||||
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
|
||||
else:
|
||||
# Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
|
||||
special_tokens_count = 3 if sep_token_extra else 2
|
||||
|
||||
Reference in New Issue
Block a user