GPT-2 option to avoid predicting special tokens
This commit is contained in:
@@ -263,8 +263,8 @@ class GPT2Tokenizer(object):
|
||||
def encode(self, text):
|
||||
return self.convert_tokens_to_ids(self.tokenize(text))
|
||||
|
||||
def decode(self, tokens):
|
||||
text = ''.join([self.decoder[token] for token in tokens])
|
||||
def decode(self, tokens, skip_special_tokens=False):
|
||||
text = ''.join(self.convert_ids_to_tokens(tokens, skip_special_tokens=skip_special_tokens))
|
||||
text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=self.errors)
|
||||
return text
|
||||
|
||||
|
||||
Reference in New Issue
Block a user