Enable code-specific revision for code on the Hub (#23799)
* Enable code-specific revision for code on the Hub * invalidate old revision
This commit is contained in:
@@ -316,7 +316,7 @@ def get_cached_module_file(
|
|||||||
)
|
)
|
||||||
new_files.append(f"{module_needed}.py")
|
new_files.append(f"{module_needed}.py")
|
||||||
|
|
||||||
if len(new_files) > 0:
|
if len(new_files) > 0 and revision is None:
|
||||||
new_files = "\n".join([f"- {f}" for f in new_files])
|
new_files = "\n".join([f"- {f}" for f in new_files])
|
||||||
repo_type_str = "" if repo_type is None else f"{repo_type}s/"
|
repo_type_str = "" if repo_type is None else f"{repo_type}s/"
|
||||||
url = f"https://huggingface.co/{repo_type_str}{pretrained_model_name_or_path}"
|
url = f"https://huggingface.co/{repo_type_str}{pretrained_model_name_or_path}"
|
||||||
@@ -340,6 +340,7 @@ def get_class_from_dynamic_module(
|
|||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
local_files_only: bool = False,
|
local_files_only: bool = False,
|
||||||
repo_type: Optional[str] = None,
|
repo_type: Optional[str] = None,
|
||||||
|
code_revision: Optional[str] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -391,6 +392,10 @@ def get_class_from_dynamic_module(
|
|||||||
If `True`, will only try to load the tokenizer configuration from local files.
|
If `True`, will only try to load the tokenizer configuration from local files.
|
||||||
repo_type (`str`, *optional*):
|
repo_type (`str`, *optional*):
|
||||||
Specify the repo type (useful when downloading from a space for instance).
|
Specify the repo type (useful when downloading from a space for instance).
|
||||||
|
code_revision (`str`, *optional*, defaults to `"main"`):
|
||||||
|
The specific revision to use for the code on the Hub, if the code leaves in a different repository than the
|
||||||
|
rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based system for
|
||||||
|
storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
|
||||||
|
|
||||||
<Tip>
|
<Tip>
|
||||||
|
|
||||||
@@ -415,12 +420,12 @@ def get_class_from_dynamic_module(
|
|||||||
# Catch the name of the repo if it's specified in `class_reference`
|
# Catch the name of the repo if it's specified in `class_reference`
|
||||||
if "--" in class_reference:
|
if "--" in class_reference:
|
||||||
repo_id, class_reference = class_reference.split("--")
|
repo_id, class_reference = class_reference.split("--")
|
||||||
# Invalidate revision since it's not relevant for this repo
|
|
||||||
revision = "main"
|
|
||||||
else:
|
else:
|
||||||
repo_id = pretrained_model_name_or_path
|
repo_id = pretrained_model_name_or_path
|
||||||
module_file, class_name = class_reference.split(".")
|
module_file, class_name = class_reference.split(".")
|
||||||
|
|
||||||
|
if code_revision is None and pretrained_model_name_or_path == repo_id:
|
||||||
|
code_revision = revision
|
||||||
# And lastly we get the class inside our newly created module
|
# And lastly we get the class inside our newly created module
|
||||||
final_module = get_cached_module_file(
|
final_module = get_cached_module_file(
|
||||||
repo_id,
|
repo_id,
|
||||||
@@ -430,7 +435,7 @@ def get_class_from_dynamic_module(
|
|||||||
resume_download=resume_download,
|
resume_download=resume_download,
|
||||||
proxies=proxies,
|
proxies=proxies,
|
||||||
use_auth_token=use_auth_token,
|
use_auth_token=use_auth_token,
|
||||||
revision=revision,
|
revision=code_revision,
|
||||||
local_files_only=local_files_only,
|
local_files_only=local_files_only,
|
||||||
repo_type=repo_type,
|
repo_type=repo_type,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -128,6 +128,11 @@ FROM_PRETRAINED_TORCH_DOCSTRING = """
|
|||||||
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
|
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
|
||||||
should only be set to `True` for repositories you trust and in which you have read the code, as it will
|
should only be set to `True` for repositories you trust and in which you have read the code, as it will
|
||||||
execute code present on the Hub on your local machine.
|
execute code present on the Hub on your local machine.
|
||||||
|
code_revision (`str`, *optional*, defaults to `"main"`):
|
||||||
|
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
|
||||||
|
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
|
||||||
|
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
|
||||||
|
allowed by git.
|
||||||
kwargs (additional keyword arguments, *optional*):
|
kwargs (additional keyword arguments, *optional*):
|
||||||
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
|
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
|
||||||
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
|
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
|
||||||
@@ -224,6 +229,11 @@ FROM_PRETRAINED_TF_DOCSTRING = """
|
|||||||
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
|
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
|
||||||
should only be set to `True` for repositories you trust and in which you have read the code, as it will
|
should only be set to `True` for repositories you trust and in which you have read the code, as it will
|
||||||
execute code present on the Hub on your local machine.
|
execute code present on the Hub on your local machine.
|
||||||
|
code_revision (`str`, *optional*, defaults to `"main"`):
|
||||||
|
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
|
||||||
|
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
|
||||||
|
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
|
||||||
|
allowed by git.
|
||||||
kwargs (additional keyword arguments, *optional*):
|
kwargs (additional keyword arguments, *optional*):
|
||||||
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
|
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
|
||||||
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
|
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
|
||||||
@@ -320,6 +330,11 @@ FROM_PRETRAINED_FLAX_DOCSTRING = """
|
|||||||
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
|
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
|
||||||
should only be set to `True` for repositories you trust and in which you have read the code, as it will
|
should only be set to `True` for repositories you trust and in which you have read the code, as it will
|
||||||
execute code present on the Hub on your local machine.
|
execute code present on the Hub on your local machine.
|
||||||
|
code_revision (`str`, *optional*, defaults to `"main"`):
|
||||||
|
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
|
||||||
|
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
|
||||||
|
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
|
||||||
|
allowed by git.
|
||||||
kwargs (additional keyword arguments, *optional*):
|
kwargs (additional keyword arguments, *optional*):
|
||||||
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
|
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
|
||||||
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
|
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
|
||||||
@@ -408,6 +423,7 @@ class _BaseAutoModelClass:
|
|||||||
else:
|
else:
|
||||||
repo_id = config.name_or_path
|
repo_id = config.name_or_path
|
||||||
model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs)
|
model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs)
|
||||||
|
_ = kwargs.pop("code_revision", None)
|
||||||
return model_class._from_config(config, **kwargs)
|
return model_class._from_config(config, **kwargs)
|
||||||
elif type(config) in cls._model_mapping.keys():
|
elif type(config) in cls._model_mapping.keys():
|
||||||
model_class = _get_model_class(config, cls._model_mapping)
|
model_class = _get_model_class(config, cls._model_mapping)
|
||||||
@@ -425,6 +441,7 @@ class _BaseAutoModelClass:
|
|||||||
kwargs["_from_auto"] = True
|
kwargs["_from_auto"] = True
|
||||||
hub_kwargs_names = [
|
hub_kwargs_names = [
|
||||||
"cache_dir",
|
"cache_dir",
|
||||||
|
"code_revision",
|
||||||
"force_download",
|
"force_download",
|
||||||
"local_files_only",
|
"local_files_only",
|
||||||
"proxies",
|
"proxies",
|
||||||
@@ -464,6 +481,7 @@ class _BaseAutoModelClass:
|
|||||||
model_class = get_class_from_dynamic_module(
|
model_class = get_class_from_dynamic_module(
|
||||||
class_ref, pretrained_model_name_or_path, **hub_kwargs, **kwargs
|
class_ref, pretrained_model_name_or_path, **hub_kwargs, **kwargs
|
||||||
)
|
)
|
||||||
|
_ = hub_kwargs.pop("code_revision", None)
|
||||||
return model_class.from_pretrained(
|
return model_class.from_pretrained(
|
||||||
pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
|
pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -938,6 +938,7 @@ class AutoConfig:
|
|||||||
)
|
)
|
||||||
class_ref = config_dict["auto_map"]["AutoConfig"]
|
class_ref = config_dict["auto_map"]["AutoConfig"]
|
||||||
config_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
|
config_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
|
||||||
|
_ = kwargs.pop("code_revision", None)
|
||||||
return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
||||||
elif "model_type" in config_dict:
|
elif "model_type" in config_dict:
|
||||||
config_class = CONFIG_MAPPING[config_dict["model_type"]]
|
config_class = CONFIG_MAPPING[config_dict["model_type"]]
|
||||||
|
|||||||
@@ -337,6 +337,7 @@ class AutoFeatureExtractor:
|
|||||||
feature_extractor_class = get_class_from_dynamic_module(
|
feature_extractor_class = get_class_from_dynamic_module(
|
||||||
feature_extractor_auto_map, pretrained_model_name_or_path, **kwargs
|
feature_extractor_auto_map, pretrained_model_name_or_path, **kwargs
|
||||||
)
|
)
|
||||||
|
_ = kwargs.pop("code_revision", None)
|
||||||
else:
|
else:
|
||||||
feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)
|
feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)
|
||||||
|
|
||||||
|
|||||||
@@ -361,6 +361,7 @@ class AutoImageProcessor:
|
|||||||
image_processor_class = get_class_from_dynamic_module(
|
image_processor_class = get_class_from_dynamic_module(
|
||||||
image_processor_auto_map, pretrained_model_name_or_path, **kwargs
|
image_processor_auto_map, pretrained_model_name_or_path, **kwargs
|
||||||
)
|
)
|
||||||
|
_ = kwargs.pop("code_revision", None)
|
||||||
else:
|
else:
|
||||||
image_processor_class = image_processor_class_from_name(image_processor_class)
|
image_processor_class = image_processor_class_from_name(image_processor_class)
|
||||||
|
|
||||||
|
|||||||
@@ -259,6 +259,7 @@ class AutoProcessor:
|
|||||||
processor_class = get_class_from_dynamic_module(
|
processor_class = get_class_from_dynamic_module(
|
||||||
processor_auto_map, pretrained_model_name_or_path, **kwargs
|
processor_auto_map, pretrained_model_name_or_path, **kwargs
|
||||||
)
|
)
|
||||||
|
_ = kwargs.pop("code_revision", None)
|
||||||
else:
|
else:
|
||||||
processor_class = processor_class_from_name(processor_class)
|
processor_class = processor_class_from_name(processor_class)
|
||||||
|
|
||||||
|
|||||||
@@ -678,6 +678,7 @@ class AutoTokenizer:
|
|||||||
else:
|
else:
|
||||||
class_ref = tokenizer_auto_map[0]
|
class_ref = tokenizer_auto_map[0]
|
||||||
tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
|
tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
|
||||||
|
_ = kwargs.pop("code_revision", None)
|
||||||
|
|
||||||
elif use_fast and not config_tokenizer_class.endswith("Fast"):
|
elif use_fast and not config_tokenizer_class.endswith("Fast"):
|
||||||
tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
|
tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
|
||||||
|
|||||||
Reference in New Issue
Block a user