max_len_single_sentence & max_len_sentences_pair as attributes so they can be modified

2019-08-23 22:07:26 +02:00
parent ab7bd5ef98
commit 3bcbebd440
8 changed files with 26 additions and 40 deletions
--- a/pytorch_transformers/tokenization_roberta.py
+++ b/pytorch_transformers/tokenization_roberta.py
@@ -77,6 +77,9 @@ class RobertaTokenizer(PreTrainedTokenizer):
                                               sep_token=sep_token, cls_token=cls_token, pad_token=pad_token,
                                               mask_token=mask_token, **kwargs)

+        self.max_len_single_sentence = self.max_len - 2  # take into account special tokens
+        self.max_len_sentences_pair = self.max_len - 4  # take into account special tokens
+
        self.encoder = json.load(open(vocab_file, encoding="utf-8"))
        self.decoder = {v: k for k, v in self.encoder.items()}
        self.errors = errors  # how to handle errors in decoding
@@ -160,14 +163,6 @@ class RobertaTokenizer(PreTrainedTokenizer):
        text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors=self.errors)
        return text

-    @property
-    def max_len_single_sentence(self):
-        return self.max_len - 2  # take into account special tokens
-
-    @property
-    def max_len_sentences_pair(self):
-        return self.max_len - 4  # take into account special tokens
-
    def add_special_tokens_single_sentence(self, token_ids):
        """
        Adds special tokens to a sequence for sequence classification tasks.