From 90f980ed356cc27b7faefc5e22abef5b7ded62bb Mon Sep 17 00:00:00 2001 From: Yuta Hayashibe Date: Fri, 1 Oct 2021 22:29:08 +0900 Subject: [PATCH] Fix warning situation: UserWarning: max_length is ignored when padding=True" (#13829) * Removed wrong warning * Raise a warning when `max_length` is given with wrong `truncation` * Update the error message * Update the warning message Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/tokenization_utils_base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 0529962bff..aafb6c28be 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2223,8 +2223,11 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): elif padding is not False: if padding is True: if verbose: - if max_length is not None: - warnings.warn("`max_length` is ignored when `padding`=`True`.") + if max_length is not None and (truncation is False or truncation == "do_not_truncate"): + warnings.warn( + "`max_length` is ignored when `padding`=`True` and there is no truncation strategy. " + "To pad to max length, use `padding='max_length'`." + ) if old_pad_to_max_length is not False: warnings.warn("Though `pad_to_max_length` = `True`, it is ignored because `padding`=`True`.") padding_strategy = PaddingStrategy.LONGEST # Default to pad to the longest sequence in the batch