Use code on the Hub from another repo (#22698)
* initial work * Add other classes * Refactor code * Move warning and fix dynamic pipeline * Issue warning when necessary * Add test
This commit is contained in:
@@ -1817,6 +1817,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
cache_dir=cache_dir,
|
||||
local_files_only=local_files_only,
|
||||
_commit_hash=commit_hash,
|
||||
_is_local=is_local,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -1831,6 +1832,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
cache_dir=None,
|
||||
local_files_only=False,
|
||||
_commit_hash=None,
|
||||
_is_local=False,
|
||||
**kwargs,
|
||||
):
|
||||
# We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json
|
||||
@@ -1861,7 +1863,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
# First attempt. We get tokenizer_class from tokenizer_config to check mismatch between tokenizers.
|
||||
config_tokenizer_class = init_kwargs.get("tokenizer_class")
|
||||
init_kwargs.pop("tokenizer_class", None)
|
||||
init_kwargs.pop("auto_map", None)
|
||||
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
||||
if not init_inputs:
|
||||
init_inputs = saved_init_inputs
|
||||
@@ -1869,6 +1870,15 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
config_tokenizer_class = None
|
||||
init_kwargs = init_configuration
|
||||
|
||||
if "auto_map" in init_kwargs and not _is_local:
|
||||
new_auto_map = {}
|
||||
for key, value in init_kwargs["auto_map"].items():
|
||||
if isinstance(value, (list, tuple)):
|
||||
new_auto_map[key] = [f"{pretrained_model_name_or_path}--{v}" for v in value]
|
||||
else:
|
||||
new_auto_map[key] = f"{pretrained_model_name_or_path}--{value}"
|
||||
init_kwargs["auto_map"] = new_auto_map
|
||||
|
||||
if config_tokenizer_class is None:
|
||||
from .models.auto.configuration_auto import AutoConfig # tests_ignore
|
||||
|
||||
|
||||
Reference in New Issue
Block a user