Updated the GLUE data processor. Corrections to RoBERTa and XLNet.
This commit is contained in:
@@ -98,7 +98,7 @@ class RobertaTokenizer(GPT2Tokenizer):
|
||||
if output_mask:
|
||||
return (
|
||||
cls + token_ids_0 + sep + sep + token_ids_1 + sep,
|
||||
[0] * len(cls + token_ids_0 + sep) + [1] * len(sep + token_ids_1 + sep)
|
||||
[0] * len(cls + token_ids_0 + sep + sep) + [1] * len(token_ids_1 + sep)
|
||||
)
|
||||
else:
|
||||
return cls + token_ids_0 + sep + sep + token_ids_1 + sep
|
||||
|
||||
@@ -198,10 +198,11 @@ class XLNetTokenizer(PreTrainedTokenizer):
|
||||
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
cls_segment_ids = [2]
|
||||
if output_mask:
|
||||
return (
|
||||
token_ids_0 + sep + token_ids_1 + sep + cls,
|
||||
[0] * len(token_ids_0 + sep) + [1] * len(token_ids_1 + sep + cls)
|
||||
[0] * len(token_ids_0 + sep) + [1] * len(token_ids_1 + sep) + cls_segment_ids
|
||||
)
|
||||
else:
|
||||
return token_ids_0 + sep + token_ids_1 + sep + cls
|
||||
|
||||
Reference in New Issue
Block a user