From 7bd865051237137ec1df666034595408a7e38e24 Mon Sep 17 00:00:00 2001
From: Maria Khalusova <kafooster@gmail.com>
Date: Mon, 20 Mar 2023 14:18:55 -0400
Subject: [PATCH] Example of pad_to_multiple_of for padding and truncation
 guide & docstring update (#22278)

* added an example of pad_to_multiple_of

* make style

* addressed feedback
---
 docs/source/en/pad_truncation.mdx           | 1 +
 src/transformers/tokenization_utils_base.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/pad_truncation.mdx b/docs/source/en/pad_truncation.mdx
index f848e23bed..8862e0be00 100644
--- a/docs/source/en/pad_truncation.mdx
+++ b/docs/source/en/pad_truncation.mdx
@@ -50,6 +50,7 @@ The following table summarizes the recommended way to setup padding and truncati
 |                                      |                                   | `tokenizer(batch_sentences, padding='longest')`                                        |
 |                                      | padding to max model input length | `tokenizer(batch_sentences, padding='max_length')`                                     |
 |                                      | padding to specific length        | `tokenizer(batch_sentences, padding='max_length', max_length=42)`                      |
+|                                      | padding to a multiple of a value  | `tokenizer(batch_sentences, padding=True, pad_to_multiple_of=8)                        |
 | truncation to max model input length | no padding                        | `tokenizer(batch_sentences, truncation=True)` or                                       |
 |                                      |                                   | `tokenizer(batch_sentences, truncation=STRATEGY)`                                      |
 |                                      | padding to max sequence in batch  | `tokenizer(batch_sentences, padding=True, truncation=True)` or                         |
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index eb52ef0adb..66164c2778 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1342,8 +1342,9 @@ ENCODE_KWARGS_DOCSTRING = r"""
                 tokenizer assumes the input is already split into words (for instance, by splitting it on whitespace)
                 which it will tokenize. This is useful for NER or token classification.
             pad_to_multiple_of (`int`, *optional*):
-                If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
-                the use of Tensor Cores on NVIDIA hardware with compute capability `>= 7.5` (Volta).
+                If set will pad the sequence to a multiple of the provided value. Requires `padding` to be activated.
+                This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability
+                `>= 7.5` (Volta).
             return_tensors (`str` or [`~utils.TensorType`], *optional*):
                 If set, will return tensors instead of list of python integers. Acceptable values are: