Merge branch 'master' into improved_testing

This commit is contained in:
Thomas Wolf
2019-08-30 13:40:35 +02:00
committed by GitHub
43 changed files with 3103 additions and 56 deletions

View File

@@ -84,14 +84,14 @@ class RobertaTokenizer(GPT2Tokenizer):
def add_special_tokens_single_sentence(self, token_ids):
"""
Adds special tokens to a sequence for sequence classification tasks.
A RoBERTa sequence has the following format: [CLS] X [SEP]
A RoBERTa sequence has the following format: <s> X </s>
"""
return [self.cls_token_id] + token_ids + [self.sep_token_id]
def add_special_tokens_sentences_pair(self, token_ids_0, token_ids_1):
"""
Adds special tokens to a sequence pair for sequence classification tasks.
A RoBERTa sequence pair has the following format: [CLS] A [SEP][SEP] B [SEP]
A RoBERTa sequence pair has the following format: <s> A </s></s> B </s>
"""
sep = [self.sep_token_id]
cls = [self.cls_token_id]