Add get_vocab method to PretrainedTokenizer

This commit is contained in:
Joe Davison
2020-02-20 15:25:46 -05:00
parent ea8eba35e2
commit 197d74f988
12 changed files with 62 additions and 0 deletions

View File

@@ -147,6 +147,9 @@ class CTRLTokenizer(PreTrainedTokenizer):
def vocab_size(self):
return len(self.encoder)
def get_vocab(self):
return dict(self.encoder, **self.added_tokens_encoder)
def bpe(self, token):
if token in self.cache:
return self.cache[token]