update tokenizer - update squad example for xlnet

This commit is contained in:
thomwolf
2019-07-15 17:30:42 +02:00
parent 3b469cb422
commit 15d8b1266c
20 changed files with 191 additions and 131 deletions

View File

@@ -185,9 +185,9 @@ class GPT2Tokenizer(PreTrainedTokenizer):
"""Converts an index (integer) in a token (string/unicode) using the vocab."""
return self.decoder.get(index)
def _convert_ids_to_string(self, tokens_ids):
"""Converts a sequence of ids in a string."""
text = ''.join(tokens_ids)
def convert_tokens_to_string(self, tokens):
""" Converts a sequence of tokens (string) in a single string. """
text = ''.join(tokens)
text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=self.errors)
return text