fix tokenization
This commit is contained in:
@@ -205,7 +205,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
|
||||
|
||||
def convert_tokens_to_string(self, tokens):
|
||||
""" Converts a sequence of tokens (string) in a single string. """
|
||||
out_string = ''.join(tokens).replace('@@', ' ').strip()
|
||||
out_string = ' '.join(tokens).replace('@@ ', '').strip()
|
||||
return out_string
|
||||
|
||||
def save_vocabulary(self, save_directory):
|
||||
|
||||
Reference in New Issue
Block a user