fix tokenization

This commit is contained in:
thomwolf
2019-10-08 17:19:28 +02:00
parent 03c2c762a6
commit 248314772f
2 changed files with 2 additions and 2 deletions

View File

@@ -205,7 +205,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
def convert_tokens_to_string(self, tokens):
""" Converts a sequence of tokens (string) in a single string. """
out_string = ''.join(tokens).replace('@@', ' ').strip()
out_string = ' '.join(tokens).replace('@@ ', '').strip()
return out_string
def save_vocabulary(self, save_directory):