Warn the user about max_len being on the path to be deprecated. (#4528)

* Warn the user about max_len being on the path to be deprecated.

* Ensure better backward compatibility when max_len is provided to a tokenizer.

* Make sure to override the parameter and not the actual instance value.

* Format & quality
This commit is contained in:
Funtowicz Morgan
2020-05-22 22:08:30 +00:00
committed by GitHub
parent 0f6969b7e9
commit 996f393a86

View File

@@ -22,6 +22,7 @@ import logging
import operator
import os
import re
import warnings
from collections import UserDict, defaultdict
from contextlib import contextmanager
from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union
@@ -822,7 +823,14 @@ class PreTrainedTokenizer(SpecialTokensMixin):
super().__init__(**kwargs)
# For backward compatibility we fallback to set model_max_length from max_len if provided
model_max_length = model_max_length if model_max_length is not None else kwargs.pop("max_len", None)
if "max_len" in kwargs:
warnings.warn(
"Parameter max_len is deprecated and will be removed in a future release. "
"Use model_max_length instead.",
category=FutureWarning,
)
model_max_length = kwargs.pop("max_len")
self.model_max_length = model_max_length if model_max_length is not None else VERY_LARGE_INTEGER
# Padding side is right by default and overridden in subclasses. If specified in the kwargs, it is changed.