From 8ba4c5885f03e437edff225e53f8fd334ebc3819 Mon Sep 17 00:00:00 2001 From: Thomas Wolf Date: Wed, 29 Apr 2020 01:13:59 +0200 Subject: [PATCH] Allow a more backward compatible behavior of max_len_single_sentence and max_len_sentences_pair (#3994) * Allow a more backward compatible behavior of max_len_single_sentence and max_len_sentences_pair and * The style and quality are now top-notch --- src/transformers/tokenization_utils.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index dee5b12ad5..d80c66596a 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -785,6 +785,30 @@ class PreTrainedTokenizer(SpecialTokensMixin): def max_len_sentences_pair(self): return self.model_max_length - self.num_special_tokens_to_add(pair=True) + @max_len_single_sentence.setter + def max_len_single_sentence(self, value): + """ For backward compatibility, allow to try to setup 'max_len_single_sentence' """ + if value == self.model_max_length - self.num_special_tokens_to_add(pair=False): + logger.warning( + "Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up." + ) + else: + raise ValueError( + "Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up." + ) + + @max_len_sentences_pair.setter + def max_len_sentences_pair(self, value): + """ For backward compatibility, allow to try to setup 'max_len_sentences_pair' """ + if value == self.model_max_length - self.num_special_tokens_to_add(pair=True): + logger.warning( + "Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up." + ) + else: + raise ValueError( + "Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up." + ) + def get_vocab(self): """ Returns the vocabulary as a dict of {token: index} pairs. `tokenizer.get_vocab()[token]` is equivalent to `tokenizer.convert_tokens_to_ids(token)` when `token` is in the vocab. """ raise NotImplementedError()