Supports already existing special tokens

This commit is contained in:
LysandreJik
2019-09-30 14:11:41 -04:00
parent 2f259b228e
commit cc412edd42
6 changed files with 33 additions and 5 deletions

View File

@@ -908,7 +908,7 @@ class PreTrainedTokenizer(object):
logger.warning("This tokenizer does not make use of special tokens. The two sequences have been concatenated.")
return token_ids_0 + token_ids_1
def get_sequence_ids(self, token_ids_0, token_ids_1=None):
def get_sequence_ids(self, token_ids_0, token_ids_1=None, special_tokens_present=False):
return [1] * ((len(token_ids_1) if token_ids_1 else 0) + len(token_ids_0))
def convert_ids_to_tokens(self, ids, skip_special_tokens=False):