From da72ac6e260487c9e480d15592318c19a5227499 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Sat, 17 Jul 2021 15:52:33 +0200 Subject: [PATCH] Fix push_to_hub docstring and make it appear in doc (#12770) --- docs/source/main_classes/configuration.rst | 1 + docs/source/main_classes/model.rst | 3 ++ docs/source/main_classes/tokenizer.rst | 12 +++---- src/transformers/configuration_utils.py | 16 ++++++++- src/transformers/file_utils.py | 36 +++++++++------------ src/transformers/modeling_flax_utils.py | 7 ++++ src/transformers/modeling_tf_utils.py | 8 +++++ src/transformers/modeling_utils.py | 8 +++++ src/transformers/tokenization_utils_base.py | 8 +++++ 9 files changed, 70 insertions(+), 29 deletions(-) diff --git a/docs/source/main_classes/configuration.rst b/docs/source/main_classes/configuration.rst index 1f39f77180..464160a9c6 100644 --- a/docs/source/main_classes/configuration.rst +++ b/docs/source/main_classes/configuration.rst @@ -22,4 +22,5 @@ PretrainedConfig ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.PretrainedConfig + :special-members: push_to_hub :members: diff --git a/docs/source/main_classes/model.rst b/docs/source/main_classes/model.rst index d3bb0e2326..4633315a28 100644 --- a/docs/source/main_classes/model.rst +++ b/docs/source/main_classes/model.rst @@ -35,6 +35,7 @@ PreTrainedModel ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.PreTrainedModel + :special-members: push_to_hub :members: @@ -80,6 +81,7 @@ TFPreTrainedModel ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.TFPreTrainedModel + :special-members: push_to_hub :members: @@ -94,6 +96,7 @@ FlaxPreTrainedModel ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.FlaxPreTrainedModel + :special-members: push_to_hub :members: diff --git a/docs/source/main_classes/tokenizer.rst b/docs/source/main_classes/tokenizer.rst index 26cde90b32..8ef1ac56ba 100644 --- a/docs/source/main_classes/tokenizer.rst +++ b/docs/source/main_classes/tokenizer.rst @@ -53,10 +53,8 @@ PreTrainedTokenizer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: transformers.PreTrainedTokenizer - :special-members: __call__ - :members: batch_decode, convert_ids_to_tokens, convert_tokens_to_ids, convert_tokens_to_string, decode, encode, - get_added_vocab, get_special_tokens_mask, num_special_tokens_to_add, prepare_for_tokenization, tokenize, - vocab_size + :special-members: __call__, batch_decode, decode, encode, push_to_hub + :members: PreTrainedTokenizerFast @@ -68,10 +66,8 @@ loaded very simply into 🤗 transformers. Take a look at the :doc:`Using tokeni <../fast_tokenizers>` page to understand how this is done. .. autoclass:: transformers.PreTrainedTokenizerFast - :special-members: __call__ - :members: batch_decode, convert_ids_to_tokens, convert_tokens_to_ids, convert_tokens_to_string, decode, encode, - get_added_vocab, get_special_tokens_mask, num_special_tokens_to_add, - set_truncation_and_padding,tokenize, vocab_size + :special-members: __call__, batch_decode, decode, encode, push_to_hub + :members: BatchEncoding diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 5490b5d611..a818cca8d4 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -22,7 +22,15 @@ import os from typing import Any, Dict, Tuple, Union from . import __version__ -from .file_utils import CONFIG_NAME, PushToHubMixin, cached_path, hf_bucket_url, is_offline_mode, is_remote_url +from .file_utils import ( + CONFIG_NAME, + PushToHubMixin, + cached_path, + copy_func, + hf_bucket_url, + is_offline_mode, + is_remote_url, +) from .utils import logging @@ -729,3 +737,9 @@ class PretrainedConfig(PushToHubMixin): ) setattr(self, k, v) + + +PretrainedConfig.push_to_hub = copy_func(PretrainedConfig.push_to_hub) +PretrainedConfig.push_to_hub.__doc__ = PretrainedConfig.push_to_hub.__doc__.format( + object="config", object_class="AutoConfig", object_files="configuration file" +) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 1b53e46699..52af49344a 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -1991,14 +1991,14 @@ class PushToHubMixin: use_auth_token: Optional[Union[bool, str]] = None, ) -> str: """ - Upload model checkpoint or tokenizer files to the 🤗 Model Hub while synchronizing a local clone of the repo in + Upload the {object_files} to the 🤗 Model Hub while synchronizing a local clone of the repo in :obj:`repo_path_or_name`. Parameters: repo_path_or_name (:obj:`str`, `optional`): - Can either be a repository name for your model or tokenizer in the Hub or a path to a local folder (in - which case the repository will have the name of that local folder). If not specified, will default to - the name given by :obj:`repo_url` and a local directory with that name will be created. + Can either be a repository name for your {object} in the Hub or a path to a local folder (in which case + the repository will have the name of that local folder). If not specified, will default to the name + given by :obj:`repo_url` and a local directory with that name will be created. repo_url (:obj:`str`, `optional`): Specify this in case you want to push to an existing repository in the hub. If unspecified, a new repository will be created in your namespace (unless you specify an :obj:`organization`) with @@ -2008,11 +2008,9 @@ class PushToHubMixin: the current working directory. This will slow things down if you are making changes in an existing repo since you will need to clone the repo before every push. commit_message (:obj:`str`, `optional`): - Message to commit while pushing. Will default to :obj:`"add config"`, :obj:`"add tokenizer"` or - :obj:`"add model"` depending on the type of the class. + Message to commit while pushing. Will default to :obj:`"add {object}"`. organization (:obj:`str`, `optional`): - Organization in which you want to push your model or tokenizer (you must be a member of this - organization). + Organization in which you want to push your {object} (you must be a member of this organization). private (:obj:`bool`, `optional`): Whether or not the repository created should be private (requires a paying subscription). use_auth_token (:obj:`bool` or :obj:`str`, `optional`): @@ -2022,29 +2020,27 @@ class PushToHubMixin: Returns: - The url of the commit of your model in the given repository. + :obj:`str`: The url of the commit of your {object} in the given repository. Examples:: - # Upload a model to the Hub: - from transformers import AutoModel + from transformers import {object_class} - model = BertModel.from_pretrained("bert-base-cased") - # Fine-tuning code + {object} = {object_class}.from_pretrained("bert-base-cased") - # Push the model to your namespace with the name "my-finetuned-bert" and have a local clone in the + # Push the {object} to your namespace with the name "my-finetuned-bert" and have a local clone in the # `my-finetuned-bert` folder. - model.push_to_hub("my-finetuned-bert") + {object}.push_to_hub("my-finetuned-bert") - # Push the model to your namespace with the name "my-finetuned-bert" with no local clone. - model.push_to_hub("my-finetuned-bert", use_temp_dir=True) + # Push the {object} to your namespace with the name "my-finetuned-bert" with no local clone. + {object}.push_to_hub("my-finetuned-bert", use_temp_dir=True) - # Push the model to an organization with the name "my-finetuned-bert" and have a local clone in the + # Push the {object} to an organization with the name "my-finetuned-bert" and have a local clone in the # `my-finetuned-bert` folder. - model.push_to_hub("my-finetuned-bert", organization="huggingface") + {object}.push_to_hub("my-finetuned-bert", organization="huggingface") # Make a change to an existing repo that has been cloned locally in `my-finetuned-bert`. - model.push_to_hub("my-finetuned-bert", repo_url="https://huggingface.co/sgugger/my-finetuned-bert") + {object}.push_to_hub("my-finetuned-bert", repo_url="https://huggingface.co/sgugger/my-finetuned-bert") """ if use_temp_dir: # Make sure we use the right `repo_name` for the `repo_url` before replacing it. diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index 6c4a30a35c..7507a0831e 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -490,6 +490,13 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin): logger.info(f"Model pushed to the hub in this commit: {url}") +# To update the docstring, we need to copy the method, otherwise we change the original docstring. +FlaxPreTrainedModel.push_to_hub = copy_func(FlaxPreTrainedModel.push_to_hub) +FlaxPreTrainedModel.push_to_hub.__doc__ = FlaxPreTrainedModel.push_to_hub.__doc__.format( + object="model", object_class="FlaxAutoModel", object_files="model checkpoint" +) + + def overwrite_call_docstring(model_class, docstring): # copy __call__ function to be sure docstring is changed only for this function model_class.__call__ = copy_func(model_class.__call__) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index da6c6c32d5..79d3211d4a 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -36,6 +36,7 @@ from .file_utils import ( ModelOutput, PushToHubMixin, cached_path, + copy_func, hf_bucket_url, is_offline_mode, is_remote_url, @@ -1392,6 +1393,13 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu return model +# To update the docstring, we need to copy the method, otherwise we change the original docstring. +TFPreTrainedModel.push_to_hub = copy_func(TFPreTrainedModel.push_to_hub) +TFPreTrainedModel.push_to_hub.__doc__ = TFPreTrainedModel.push_to_hub.__doc__.format( + object="model", object_class="TFAutoModel", object_files="model checkpoint" +) + + class TFConv1D(tf.keras.layers.Layer): """ 1D-convolutional layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2). diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 815c242f3e..b34637b021 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -38,6 +38,7 @@ from .file_utils import ( ModelOutput, PushToHubMixin, cached_path, + copy_func, hf_bucket_url, is_offline_mode, is_remote_url, @@ -1555,6 +1556,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix return retrieved_modules +# To update the docstring, we need to copy the method, otherwise we change the original docstring. +PreTrainedModel.push_to_hub = copy_func(PreTrainedModel.push_to_hub) +PreTrainedModel.push_to_hub.__doc__ = PreTrainedModel.push_to_hub.__doc__.format( + object="model", object_class="AutoModel", object_files="model checkpoint" +) + + class Conv1D(nn.Module): """ 1D-convolutional layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2). diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 5a2cf575ce..b5b9ffa86e 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -43,6 +43,7 @@ from .file_utils import ( _is_torch_device, add_end_docstrings, cached_path, + copy_func, hf_bucket_url, is_flax_available, is_offline_mode, @@ -3371,3 +3372,10 @@ For a more complete example, see the implementation of `prepare_seq2seq_batch`. ) model_inputs["labels"] = labels["input_ids"] return model_inputs + + +# To update the docstring, we need to copy the method, otherwise we change the original docstring. +PreTrainedTokenizerBase.push_to_hub = copy_func(PreTrainedTokenizerBase.push_to_hub) +PreTrainedTokenizerBase.push_to_hub.__doc__ = PreTrainedTokenizerBase.push_to_hub.__doc__.format( + object="tokenizer", object_class="AutoTokenizer", object_files="tokenizer files" +)