Use code on the Hub from another repo (#22814)

* initial work

* Add other classes

* Refactor code

* Move warning and fix dynamic pipeline

* Issue warning when necessary

* Add test

* Do not skip auto tests

* Fix failing tests

* Refactor and address review comments

* Address review comments
This commit is contained in:
Sylvain Gugger
2023-04-18 13:46:11 -04:00
committed by GitHub
parent aec10d162f
commit 5f9b825c89
15 changed files with 124 additions and 66 deletions

View File

@@ -40,6 +40,7 @@ from .utils import (
PushToHubMixin,
TensorType,
add_end_docstrings,
add_model_info_to_auto_map,
cached_file,
copy_func,
download_url,
@@ -1817,6 +1818,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
cache_dir=cache_dir,
local_files_only=local_files_only,
_commit_hash=commit_hash,
_is_local=is_local,
**kwargs,
)
@@ -1831,6 +1833,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
cache_dir=None,
local_files_only=False,
_commit_hash=None,
_is_local=False,
**kwargs,
):
# We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json
@@ -1861,7 +1864,6 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
# First attempt. We get tokenizer_class from tokenizer_config to check mismatch between tokenizers.
config_tokenizer_class = init_kwargs.get("tokenizer_class")
init_kwargs.pop("tokenizer_class", None)
init_kwargs.pop("auto_map", None)
saved_init_inputs = init_kwargs.pop("init_inputs", ())
if not init_inputs:
init_inputs = saved_init_inputs
@@ -1869,6 +1871,14 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
config_tokenizer_class = None
init_kwargs = init_configuration
if "auto_map" in init_kwargs and not _is_local:
# For backward compatibility with odl format.
if isinstance(init_kwargs["auto_map"], (tuple, list)):
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
init_kwargs["auto_map"] = add_model_info_to_auto_map(
init_kwargs["auto_map"], pretrained_model_name_or_path
)
if config_tokenizer_class is None:
from .models.auto.configuration_auto import AutoConfig # tests_ignore