Use code on the Hub from another repo (#22814)
* initial work * Add other classes * Refactor code * Move warning and fix dynamic pipeline * Issue warning when necessary * Add test * Do not skip auto tests * Fix failing tests * Refactor and address review comments * Address review comments
This commit is contained in:
@@ -40,6 +40,7 @@ from .utils import (
|
||||
PushToHubMixin,
|
||||
TensorType,
|
||||
add_end_docstrings,
|
||||
add_model_info_to_auto_map,
|
||||
cached_file,
|
||||
copy_func,
|
||||
download_url,
|
||||
@@ -1817,6 +1818,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
cache_dir=cache_dir,
|
||||
local_files_only=local_files_only,
|
||||
_commit_hash=commit_hash,
|
||||
_is_local=is_local,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -1831,6 +1833,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
cache_dir=None,
|
||||
local_files_only=False,
|
||||
_commit_hash=None,
|
||||
_is_local=False,
|
||||
**kwargs,
|
||||
):
|
||||
# We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json
|
||||
@@ -1861,7 +1864,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
# First attempt. We get tokenizer_class from tokenizer_config to check mismatch between tokenizers.
|
||||
config_tokenizer_class = init_kwargs.get("tokenizer_class")
|
||||
init_kwargs.pop("tokenizer_class", None)
|
||||
init_kwargs.pop("auto_map", None)
|
||||
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
||||
if not init_inputs:
|
||||
init_inputs = saved_init_inputs
|
||||
@@ -1869,6 +1871,14 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
config_tokenizer_class = None
|
||||
init_kwargs = init_configuration
|
||||
|
||||
if "auto_map" in init_kwargs and not _is_local:
|
||||
# For backward compatibility with odl format.
|
||||
if isinstance(init_kwargs["auto_map"], (tuple, list)):
|
||||
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
|
||||
init_kwargs["auto_map"] = add_model_info_to_auto_map(
|
||||
init_kwargs["auto_map"], pretrained_model_name_or_path
|
||||
)
|
||||
|
||||
if config_tokenizer_class is None:
|
||||
from .models.auto.configuration_auto import AutoConfig # tests_ignore
|
||||
|
||||
|
||||
Reference in New Issue
Block a user