From pretrained correct initialization. Unknown token handling for gpt2.

This commit is contained in:
LysandreJik
2019-07-11 18:44:29 -04:00
parent 50e62a4cb4
commit e3fb4310d6
3 changed files with 4 additions and 4 deletions

View File

@@ -177,11 +177,11 @@ class GPT2Tokenizer(PreTrainedTokenizer):
def _convert_token_to_id(self, token):
""" Converts a token (str/unicode) in an id using the vocab. """
return self.encoder.get(token, self.encoder.get(self.unk_token))
return self.encoder.get(token)
def _convert_id_to_token(self, index):
"""Converts an index (integer) in a token (string/unicode) using the vocab."""
return self.decoder.get(index, self.unk_token)
return self.decoder.get(index)
def _convert_ids_to_string(self, tokens_ids):
"""Converts a sequence of ids in a string."""