From 1cd7c6f154014f62427ed3c59e14f6f503988431 Mon Sep 17 00:00:00 2001 From: YouJiacheng <1503679330@qq.com> Date: Mon, 1 Aug 2022 20:16:24 +0800 Subject: [PATCH] Fix from_pretrained kwargs passing (#18387) Fix #18385 I don't know whether `use_auth_token`, `cache_dir` and `local_files_only` should be passed to `(cls.slow_tokenizer_class)._from_pretrained`, but I guess it should. --- src/transformers/tokenization_utils_base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 7e259fce90..c8198c5ce8 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1809,6 +1809,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): *init_inputs, use_auth_token=use_auth_token, cache_dir=cache_dir, + local_files_only=local_files_only, **kwargs, ) @@ -1821,6 +1822,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): *init_inputs, use_auth_token=None, cache_dir=None, + local_files_only=False, **kwargs ): # We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json @@ -1833,6 +1835,9 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): pretrained_model_name_or_path, copy.deepcopy(init_configuration), *init_inputs, + use_auth_token=use_auth_token, + cache_dir=cache_dir, + local_files_only=local_files_only, **(copy.deepcopy(kwargs)), ) else: @@ -1864,6 +1869,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): pretrained_model_name_or_path, use_auth_token=use_auth_token, cache_dir=cache_dir, + local_files_only=local_files_only, ) config_tokenizer_class = config.tokenizer_class except (OSError, ValueError, KeyError):