This commit is contained in:
LysandreJik
2019-08-14 14:56:14 -04:00
parent c4ef103447
commit 572dcfd1db
6 changed files with 327 additions and 125 deletions

View File

@@ -65,8 +65,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class RobertaTokenizer(PreTrainedTokenizer):
"""
RoBERTa BPE tokenizer, derived from the GPT-2 tokenizer. Peculiarities:
- Byte-level BPE
RoBERTa BPE tokenizer, derived from the GPT-2 tokenizer. Peculiarities: Byte-level BPE
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP