From 7419d807ff3d2ca45757c9e3090388b721e131ce Mon Sep 17 00:00:00 2001 From: Thomas Wang <24695242+thomasw21@users.noreply.github.com> Date: Fri, 20 Jan 2023 15:54:33 +0100 Subject: [PATCH] Declare __len__ method in PreTrainedTokenizerBase (#21210) --- src/transformers/tokenization_utils_base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 4ddb63c9e1..865a7a5654 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1578,6 +1578,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): f" special_tokens={self.special_tokens_map_extended})" ) + def __len__(self) -> int: + raise NotImplementedError() + def get_vocab(self) -> Dict[str, int]: """ Returns the vocabulary as a dictionary of token to index.