From 996f393a86a3b472687759e9e9c188676d0e956b Mon Sep 17 00:00:00 2001 From: Funtowicz Morgan Date: Fri, 22 May 2020 22:08:30 +0000 Subject: [PATCH] Warn the user about max_len being on the path to be deprecated. (#4528) * Warn the user about max_len being on the path to be deprecated. * Ensure better backward compatibility when max_len is provided to a tokenizer. * Make sure to override the parameter and not the actual instance value. * Format & quality --- src/transformers/tokenization_utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 5e8d7df009..ef9079540a 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -22,6 +22,7 @@ import logging import operator import os import re +import warnings from collections import UserDict, defaultdict from contextlib import contextmanager from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union @@ -822,7 +823,14 @@ class PreTrainedTokenizer(SpecialTokensMixin): super().__init__(**kwargs) # For backward compatibility we fallback to set model_max_length from max_len if provided - model_max_length = model_max_length if model_max_length is not None else kwargs.pop("max_len", None) + if "max_len" in kwargs: + warnings.warn( + "Parameter max_len is deprecated and will be removed in a future release. " + "Use model_max_length instead.", + category=FutureWarning, + ) + + model_max_length = kwargs.pop("max_len") self.model_max_length = model_max_length if model_max_length is not None else VERY_LARGE_INTEGER # Padding side is right by default and overridden in subclasses. If specified in the kwargs, it is changed.