Handle unk_token

As we discussed, this is handled here directly 
cc @thomwolf
This commit is contained in:
Anthony MOI
2019-12-26 14:42:55 -05:00
parent 7ead04ce14
commit 835b76a46f

View File

@@ -1508,7 +1508,10 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
return self.tokenizer.encode(text).tokens
def _convert_token_to_id_with_added_voc(self, token):
return self.tokenizer.token_to_id(token)
id = self.tokenizer.token_to_id(token)
if id is None:
return self.unk_token_id
return id
def _convert_id_to_token(self, index):
return self.tokenizer.id_to_token(int(index))