Enable code-specific revision for code on the Hub (#23799)

* Enable code-specific revision for code on the Hub

* invalidate old revision
This commit is contained in:
Sylvain Gugger
2023-05-26 15:51:15 -04:00
committed by GitHub
parent edf7772826
commit 17a55534f5
7 changed files with 32 additions and 4 deletions

View File

@@ -316,7 +316,7 @@ def get_cached_module_file(
) )
new_files.append(f"{module_needed}.py") new_files.append(f"{module_needed}.py")
if len(new_files) > 0: if len(new_files) > 0 and revision is None:
new_files = "\n".join([f"- {f}" for f in new_files]) new_files = "\n".join([f"- {f}" for f in new_files])
repo_type_str = "" if repo_type is None else f"{repo_type}s/" repo_type_str = "" if repo_type is None else f"{repo_type}s/"
url = f"https://huggingface.co/{repo_type_str}{pretrained_model_name_or_path}" url = f"https://huggingface.co/{repo_type_str}{pretrained_model_name_or_path}"
@@ -340,6 +340,7 @@ def get_class_from_dynamic_module(
revision: Optional[str] = None, revision: Optional[str] = None,
local_files_only: bool = False, local_files_only: bool = False,
repo_type: Optional[str] = None, repo_type: Optional[str] = None,
code_revision: Optional[str] = None,
**kwargs, **kwargs,
): ):
""" """
@@ -391,6 +392,10 @@ def get_class_from_dynamic_module(
If `True`, will only try to load the tokenizer configuration from local files. If `True`, will only try to load the tokenizer configuration from local files.
repo_type (`str`, *optional*): repo_type (`str`, *optional*):
Specify the repo type (useful when downloading from a space for instance). Specify the repo type (useful when downloading from a space for instance).
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than the
rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based system for
storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
<Tip> <Tip>
@@ -415,12 +420,12 @@ def get_class_from_dynamic_module(
# Catch the name of the repo if it's specified in `class_reference` # Catch the name of the repo if it's specified in `class_reference`
if "--" in class_reference: if "--" in class_reference:
repo_id, class_reference = class_reference.split("--") repo_id, class_reference = class_reference.split("--")
# Invalidate revision since it's not relevant for this repo
revision = "main"
else: else:
repo_id = pretrained_model_name_or_path repo_id = pretrained_model_name_or_path
module_file, class_name = class_reference.split(".") module_file, class_name = class_reference.split(".")
if code_revision is None and pretrained_model_name_or_path == repo_id:
code_revision = revision
# And lastly we get the class inside our newly created module # And lastly we get the class inside our newly created module
final_module = get_cached_module_file( final_module = get_cached_module_file(
repo_id, repo_id,
@@ -430,7 +435,7 @@ def get_class_from_dynamic_module(
resume_download=resume_download, resume_download=resume_download,
proxies=proxies, proxies=proxies,
use_auth_token=use_auth_token, use_auth_token=use_auth_token,
revision=revision, revision=code_revision,
local_files_only=local_files_only, local_files_only=local_files_only,
repo_type=repo_type, repo_type=repo_type,
) )

View File

@@ -128,6 +128,11 @@ FROM_PRETRAINED_TORCH_DOCSTRING = """
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine. execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*): kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or `output_attentions=True`). Behaves differently depending on whether a `config` is provided or
@@ -224,6 +229,11 @@ FROM_PRETRAINED_TF_DOCSTRING = """
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine. execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*): kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or `output_attentions=True`). Behaves differently depending on whether a `config` is provided or
@@ -320,6 +330,11 @@ FROM_PRETRAINED_FLAX_DOCSTRING = """
Whether or not to allow for custom models defined on the Hub in their own modeling files. This option Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
should only be set to `True` for repositories you trust and in which you have read the code, as it will should only be set to `True` for repositories you trust and in which you have read the code, as it will
execute code present on the Hub on your local machine. execute code present on the Hub on your local machine.
code_revision (`str`, *optional*, defaults to `"main"`):
The specific revision to use for the code on the Hub, if the code leaves in a different repository than
the rest of the model. It can be a branch name, a tag name, or a commit id, since we use a git-based
system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier
allowed by git.
kwargs (additional keyword arguments, *optional*): kwargs (additional keyword arguments, *optional*):
Can be used to update the configuration object (after it being loaded) and initiate the model (e.g., Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
`output_attentions=True`). Behaves differently depending on whether a `config` is provided or `output_attentions=True`). Behaves differently depending on whether a `config` is provided or
@@ -408,6 +423,7 @@ class _BaseAutoModelClass:
else: else:
repo_id = config.name_or_path repo_id = config.name_or_path
model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs) model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs)
_ = kwargs.pop("code_revision", None)
return model_class._from_config(config, **kwargs) return model_class._from_config(config, **kwargs)
elif type(config) in cls._model_mapping.keys(): elif type(config) in cls._model_mapping.keys():
model_class = _get_model_class(config, cls._model_mapping) model_class = _get_model_class(config, cls._model_mapping)
@@ -425,6 +441,7 @@ class _BaseAutoModelClass:
kwargs["_from_auto"] = True kwargs["_from_auto"] = True
hub_kwargs_names = [ hub_kwargs_names = [
"cache_dir", "cache_dir",
"code_revision",
"force_download", "force_download",
"local_files_only", "local_files_only",
"proxies", "proxies",
@@ -464,6 +481,7 @@ class _BaseAutoModelClass:
model_class = get_class_from_dynamic_module( model_class = get_class_from_dynamic_module(
class_ref, pretrained_model_name_or_path, **hub_kwargs, **kwargs class_ref, pretrained_model_name_or_path, **hub_kwargs, **kwargs
) )
_ = hub_kwargs.pop("code_revision", None)
return model_class.from_pretrained( return model_class.from_pretrained(
pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
) )

View File

@@ -938,6 +938,7 @@ class AutoConfig:
) )
class_ref = config_dict["auto_map"]["AutoConfig"] class_ref = config_dict["auto_map"]["AutoConfig"]
config_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs) config_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
_ = kwargs.pop("code_revision", None)
return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs) return config_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
elif "model_type" in config_dict: elif "model_type" in config_dict:
config_class = CONFIG_MAPPING[config_dict["model_type"]] config_class = CONFIG_MAPPING[config_dict["model_type"]]

View File

@@ -337,6 +337,7 @@ class AutoFeatureExtractor:
feature_extractor_class = get_class_from_dynamic_module( feature_extractor_class = get_class_from_dynamic_module(
feature_extractor_auto_map, pretrained_model_name_or_path, **kwargs feature_extractor_auto_map, pretrained_model_name_or_path, **kwargs
) )
_ = kwargs.pop("code_revision", None)
else: else:
feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class) feature_extractor_class = feature_extractor_class_from_name(feature_extractor_class)

View File

@@ -361,6 +361,7 @@ class AutoImageProcessor:
image_processor_class = get_class_from_dynamic_module( image_processor_class = get_class_from_dynamic_module(
image_processor_auto_map, pretrained_model_name_or_path, **kwargs image_processor_auto_map, pretrained_model_name_or_path, **kwargs
) )
_ = kwargs.pop("code_revision", None)
else: else:
image_processor_class = image_processor_class_from_name(image_processor_class) image_processor_class = image_processor_class_from_name(image_processor_class)

View File

@@ -259,6 +259,7 @@ class AutoProcessor:
processor_class = get_class_from_dynamic_module( processor_class = get_class_from_dynamic_module(
processor_auto_map, pretrained_model_name_or_path, **kwargs processor_auto_map, pretrained_model_name_or_path, **kwargs
) )
_ = kwargs.pop("code_revision", None)
else: else:
processor_class = processor_class_from_name(processor_class) processor_class = processor_class_from_name(processor_class)

View File

@@ -678,6 +678,7 @@ class AutoTokenizer:
else: else:
class_ref = tokenizer_auto_map[0] class_ref = tokenizer_auto_map[0]
tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs) tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
_ = kwargs.pop("code_revision", None)
elif use_fast and not config_tokenizer_class.endswith("Fast"): elif use_fast and not config_tokenizer_class.endswith("Fast"):
tokenizer_class_candidate = f"{config_tokenizer_class}Fast" tokenizer_class_candidate = f"{config_tokenizer_class}Fast"