Override get_vocab for fast tokenizer. (#4717)

This commit is contained in:
Funtowicz Morgan
2020-06-02 09:02:27 +00:00
committed by GitHub
parent 88762a2f8c
commit f6d5046af1

View File

@@ -2368,6 +2368,9 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
def _convert_id_to_token(self, index: int) -> Optional[str]: def _convert_id_to_token(self, index: int) -> Optional[str]:
return self._tokenizer.id_to_token(int(index)) return self._tokenizer.id_to_token(int(index))
def get_vocab(self):
return self._tokenizer.get_vocab(True)
def convert_tokens_to_string(self, tokens: List[int], skip_special_tokens: bool = False) -> str: def convert_tokens_to_string(self, tokens: List[int], skip_special_tokens: bool = False) -> str:
return self._tokenizer.decode(tokens, skip_special_tokens) return self._tokenizer.decode(tokens, skip_special_tokens)