From 7a22a02a7032c6fcecaad47363d84f98e239da79 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Tue, 13 Jul 2021 09:19:04 -0700 Subject: [PATCH] [tokenizer.prepare_seq2seq_batch] change deprecation to be easily actionable (#12669) * change deprecation to be easily actionable * Update src/transformers/tokenization_utils_base.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * rework as suggested * one warning together * fix format Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/tokenization_utils_base.py | 24 +++++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index e9ac2389c1..99fa84ad8e 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -3274,13 +3274,23 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): The full set of keys ``[input_ids, attention_mask, labels]``, will only be returned if tgt_texts is passed. Otherwise, input_ids, attention_mask will be the only keys. """ - warnings.warn( - "`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the " - "regular `__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` " - "context manager to prepare your targets. See the documentation of your specific tokenizer for more " - "details", - FutureWarning, - ) + # docstyle-ignore + formatted_warning = """ +`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular +`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare +your targets. + +Here is a short example: + +model_inputs = tokenizer(src_texts, ...) +with tokenizer.as_target_tokenizer(): + labels = tokenizer(tgt_texts, ...) +model_inputs["labels"] = labels["input_ids"] + +See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice. +For a more complete example, see the implementation of `prepare_seq2seq_batch`. +""" + warnings.warn(formatted_warning, FutureWarning) # mBART-specific kwargs that should be ignored by other models. kwargs.pop("src_lang", None) kwargs.pop("tgt_lang", None)