From 777faa8ae7d9232b3b5ed1d6c7cb11dca3d744c3 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Tue, 22 Oct 2019 11:26:42 -0400 Subject: [PATCH] Fix #1597 --- transformers/tokenization_ctrl.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/transformers/tokenization_ctrl.py b/transformers/tokenization_ctrl.py index 2406fa256b..c8d67ad043 100644 --- a/transformers/tokenization_ctrl.py +++ b/transformers/tokenization_ctrl.py @@ -63,11 +63,7 @@ def get_pairs(word): class CTRLTokenizer(PreTrainedTokenizer): """ CTRL BPE tokenizer. Peculiarities: - - Byte-level Byte-Pair-Encoding - - Requires a space to start the input string => the encoding methods should be called with the - ``add_prefix_space`` flag set to ``True``. - Otherwise, this tokenizer ``encode`` and ``decode`` method will not conserve - the absence of a space at the beginning of a string: `tokenizer.decode(tokenizer.encode("Hello")) = " Hello"` + - Byte-Pair-Encoding """ vocab_files_names = VOCAB_FILES_NAMES pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP