Fix GPT2 docstring
This commit is contained in:
committed by
Lysandre Debut
parent
fa963ecc59
commit
07bf43074f
@@ -107,10 +107,10 @@ class GPT2Tokenizer(PreTrainedTokenizer):
|
|||||||
"""
|
"""
|
||||||
GPT-2 BPE tokenizer. Peculiarities:
|
GPT-2 BPE tokenizer. Peculiarities:
|
||||||
- Byte-level Byte-Pair-Encoding
|
- Byte-level Byte-Pair-Encoding
|
||||||
- Requires a space to start the input string => the encoding methods should be called with the
|
- Requires a space to start the input string => the encoding and tokenize methods should be called with the
|
||||||
``add_prefix_space`` flag set to ``True``.
|
``add_prefix_space`` flag set to ``True``.
|
||||||
Otherwise, this tokenizer ``encode`` and ``decode`` method will not conserve
|
Otherwise, this tokenizer's ``encode``, ``decode``, and ``tokenize`` methods will not conserve
|
||||||
the absence of a space at the beginning of a string: `tokenizer.decode(tokenizer.encode("Hello")) = " Hello"`
|
the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"`
|
||||||
"""
|
"""
|
||||||
vocab_files_names = VOCAB_FILES_NAMES
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
||||||
|
|||||||
Reference in New Issue
Block a user