From 342ff6eb4152d84ba3f381365f8fc8706460e8a8 Mon Sep 17 00:00:00 2001 From: Jia Date: Mon, 28 Mar 2022 17:19:12 +0800 Subject: [PATCH] Update comments in class BatchEncoding (#15932) --- src/transformers/tokenization_utils_base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index c7f76c91dd..c76197f544 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -160,16 +160,17 @@ class TokenSpan(NamedTuple): class BatchEncoding(UserDict): """ - Holds the output of the [`~tokenization_utils_base.PreTrainedTokenizerBase.encode_plus`] and - [`~tokenization_utils_base.PreTrainedTokenizerBase.batch_encode`] methods (tokens, attention_masks, etc). + Holds the output of the [`~tokenization_utils_base.PreTrainedTokenizerBase.__call__`], + [`~tokenization_utils_base.PreTrainedTokenizerBase.encode_plus`] and + [`~tokenization_utils_base.PreTrainedTokenizerBase.batch_encode_plus`] methods (tokens, attention_masks, etc). This class is derived from a python dictionary and can be used as a dictionary. In addition, this class exposes utility methods to map from word/character space to token space. Args: data (`dict`): - Dictionary of lists/arrays/tensors returned by the encode/batch_encode methods ('input_ids', - 'attention_mask', etc.). + Dictionary of lists/arrays/tensors returned by the `__call__`/`encode_plus`/`batch_encode_plus` methods + ('input_ids', 'attention_mask', etc.). encoding (`tokenizers.Encoding` or `Sequence[tokenizers.Encoding]`, *optional*): If the tokenizer is a fast tokenizer which outputs additional information like mapping from word/character space to token space the `tokenizers.Encoding` instance or list of instance (for batches) hold this