Updated the GLUE data processor. Corrections to RoBERTa and XLNet.

This commit is contained in:
LysandreJik
2019-09-02 21:46:11 -04:00
parent c3df2136e1
commit bac332fec0
3 changed files with 4 additions and 56 deletions

View File

@@ -98,7 +98,7 @@ class RobertaTokenizer(GPT2Tokenizer):
if output_mask:
return (
cls + token_ids_0 + sep + sep + token_ids_1 + sep,
[0] * len(cls + token_ids_0 + sep) + [1] * len(sep + token_ids_1 + sep)
[0] * len(cls + token_ids_0 + sep + sep) + [1] * len(token_ids_1 + sep)
)
else:
return cls + token_ids_0 + sep + sep + token_ids_1 + sep

View File

@@ -198,10 +198,11 @@ class XLNetTokenizer(PreTrainedTokenizer):
sep = [self.sep_token_id]
cls = [self.cls_token_id]
cls_segment_ids = [2]
if output_mask:
return (
token_ids_0 + sep + token_ids_1 + sep + cls,
[0] * len(token_ids_0 + sep) + [1] * len(token_ids_1 + sep + cls)
[0] * len(token_ids_0 + sep) + [1] * len(token_ids_1 + sep) + cls_segment_ids
)
else:
return token_ids_0 + sep + token_ids_1 + sep + cls