max_len_single_sentence & max_len_sentences_pair as attributes so they can be modified
This commit is contained in:
@@ -77,6 +77,9 @@ class RobertaTokenizer(PreTrainedTokenizer):
|
||||
sep_token=sep_token, cls_token=cls_token, pad_token=pad_token,
|
||||
mask_token=mask_token, **kwargs)
|
||||
|
||||
self.max_len_single_sentence = self.max_len - 2 # take into account special tokens
|
||||
self.max_len_sentences_pair = self.max_len - 4 # take into account special tokens
|
||||
|
||||
self.encoder = json.load(open(vocab_file, encoding="utf-8"))
|
||||
self.decoder = {v: k for k, v in self.encoder.items()}
|
||||
self.errors = errors # how to handle errors in decoding
|
||||
@@ -160,14 +163,6 @@ class RobertaTokenizer(PreTrainedTokenizer):
|
||||
text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=self.errors)
|
||||
return text
|
||||
|
||||
@property
|
||||
def max_len_single_sentence(self):
|
||||
return self.max_len - 2 # take into account special tokens
|
||||
|
||||
@property
|
||||
def max_len_sentences_pair(self):
|
||||
return self.max_len - 4 # take into account special tokens
|
||||
|
||||
def add_special_tokens_single_sentence(self, token_ids):
|
||||
"""
|
||||
Adds special tokens to a sequence for sequence classification tasks.
|
||||
|
||||
Reference in New Issue
Block a user