Docs: add kwargs type to fix formatting (#24733)
This commit is contained in:
@@ -432,7 +432,7 @@ class PretrainedConfig(PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
if os.path.isfile(save_directory):
|
||||
|
||||
@@ -379,7 +379,7 @@ class FeatureExtractionMixin(PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
if os.path.isfile(save_directory):
|
||||
|
||||
@@ -353,7 +353,7 @@ class GenerationConfig(PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
config_file_name = config_file_name if config_file_name is not None else GENERATION_CONFIG_NAME
|
||||
|
||||
@@ -38,7 +38,7 @@ LOGITS_PROCESSOR_INPUTS_DOCSTRING = r"""
|
||||
scores (`jnp.ndarray` of shape `(batch_size, config.vocab_size)`):
|
||||
Prediction scores of a language modeling head. These can be logits for each vocabulary when not using beam
|
||||
search or log softmax for each vocabulary token when using beam search
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional logits processor specific kwargs.
|
||||
|
||||
Return:
|
||||
|
||||
@@ -296,7 +296,7 @@ class FlaxGenerationMixin:
|
||||
Custom logits processors that complement the default logits processors built from arguments and
|
||||
generation config. If a logit processor is passed that is already created with the arguments or a
|
||||
generation config an error is thrown. This feature is intended for advanced users.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
|
||||
specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
|
||||
|
||||
@@ -39,7 +39,7 @@ LOGITS_PROCESSOR_INPUTS_DOCSTRING = r"""
|
||||
scores (`torch.FloatTensor` of shape `(batch_size, config.vocab_size)`):
|
||||
Prediction scores of a language modeling head. These can be logits for each vocabulary when not using beam
|
||||
search or log softmax for each vocabulary token when using beam search
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional logits processor specific kwargs.
|
||||
|
||||
Return:
|
||||
|
||||
@@ -24,7 +24,7 @@ STOPPING_CRITERIA_INPUTS_DOCSTRING = r"""
|
||||
scores (`torch.FloatTensor` of shape `(batch_size, config.vocab_size)`):
|
||||
Prediction scores of a language modeling head. These can be scores for each vocabulary token before SoftMax
|
||||
or scores for each vocabulary token after SoftMax.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional stopping criteria specific kwargs.
|
||||
|
||||
Return:
|
||||
|
||||
@@ -42,7 +42,7 @@ TF_LOGITS_PROCESSOR_INPUTS_DOCSTRING = r"""
|
||||
cur_len (`int`):
|
||||
The current length of valid input sequence tokens. In the TF implementation, the input_ids' sequence length
|
||||
is the maximum length generate can produce, and we need to know which of its tokens are valid.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional logits processor specific kwargs.
|
||||
|
||||
Return:
|
||||
|
||||
@@ -705,7 +705,7 @@ class TFGenerationMixin:
|
||||
seed (`List[int]`, *optional*):
|
||||
Random seed to control sampling, containing two integers, used when `do_sample` is `True`. See the
|
||||
`seed` argument from stateless functions in `tf.random`.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
|
||||
specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
|
||||
|
||||
@@ -1225,7 +1225,7 @@ class GenerationMixin:
|
||||
streamer (`BaseStreamer`, *optional*):
|
||||
Streamer object that will be used to stream the generated sequences. Generated tokens are passed
|
||||
through `streamer.put(token_ids)` and the streamer is responsible for any further processing.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
|
||||
specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
|
||||
|
||||
@@ -122,8 +122,8 @@ class HfArgumentParser(ArgumentParser):
|
||||
Args:
|
||||
dataclass_types:
|
||||
Dataclass type, or list of dataclass types for which we will "fill" instances with the parsed args.
|
||||
kwargs:
|
||||
(Optional) Passed to `argparse.ArgumentParser()` in the regular way.
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Passed to `argparse.ArgumentParser()` in the regular way.
|
||||
"""
|
||||
# To make the default appear when using --help
|
||||
if "formatter_class" not in kwargs:
|
||||
|
||||
@@ -208,7 +208,7 @@ class ImageProcessingMixin(PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
if os.path.isfile(save_directory):
|
||||
|
||||
@@ -1043,7 +1043,7 @@ class FlaxPreTrainedModel(PushToHubMixin, FlaxGenerationMixin):
|
||||
|
||||
</Tip>
|
||||
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
if os.path.isfile(save_directory):
|
||||
|
||||
@@ -2371,8 +2371,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
|
||||
Whether or not to create a PR with the uploaded files or directly commit.
|
||||
safe_serialization (`bool`, *optional*, defaults to `False`):
|
||||
Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
|
||||
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
if os.path.isfile(save_directory):
|
||||
@@ -3166,7 +3165,7 @@ class TFConv1D(tf.keras.layers.Layer):
|
||||
The number of input features.
|
||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation to use to initialize the weights.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the `__init__` of `tf.keras.layers.Layer`.
|
||||
"""
|
||||
|
||||
@@ -3208,7 +3207,7 @@ class TFSharedEmbeddings(tf.keras.layers.Layer):
|
||||
initializer_range (`float`, *optional*):
|
||||
The standard deviation to use when initializing the weights. If no value is provided, it will default to
|
||||
\\(1/\sqrt{hidden\_size}\\).
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the `__init__` of `tf.keras.layers.Layer`.
|
||||
"""
|
||||
# TODO (joao): flagged for delection due to embeddings refactor
|
||||
@@ -3322,7 +3321,7 @@ class TFSequenceSummary(tf.keras.layers.Layer):
|
||||
- **summary_last_dropout** (`float`)-- Optional dropout probability after the projection and activation.
|
||||
|
||||
initializer_range (`float`, defaults to 0.02): The standard deviation to use to initialize the weights.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the `__init__` of `tf.keras.layers.Layer`.
|
||||
"""
|
||||
|
||||
|
||||
@@ -1700,8 +1700,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
|
||||
Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
|
||||
variant (`str`, *optional*):
|
||||
If specified, weights are saved in the format pytorch_model.<variant>.bin.
|
||||
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
# Checks if the model has been loaded in 8-bit
|
||||
|
||||
@@ -202,9 +202,6 @@ class JukeboxTokenizer(PreTrainedTokenizer):
|
||||
"""
|
||||
Performs any necessary transformations before tokenization.
|
||||
|
||||
This method should pop the arguments from kwargs and return the remaining `kwargs` as well. We test the
|
||||
`kwargs` at the end of the encoding process to be sure all the arguments have been used.
|
||||
|
||||
Args:
|
||||
artist (`str`):
|
||||
The artist name to prepare. This will mostly lower the string
|
||||
@@ -216,8 +213,6 @@ class JukeboxTokenizer(PreTrainedTokenizer):
|
||||
Whether or not the input is already pre-tokenized (e.g., split into words). If set to `True`, the
|
||||
tokenizer assumes the input is already split into words (for instance, by splitting it on whitespace)
|
||||
which it will tokenize. This is useful for NER or token classification.
|
||||
kwargs:
|
||||
Keyword arguments to use for the tokenization.
|
||||
"""
|
||||
for idx in range(len(self.version)):
|
||||
if self.version[idx] == "v3":
|
||||
|
||||
@@ -1228,7 +1228,7 @@ class MusicgenForCausalLM(MusicgenPreTrainedModel):
|
||||
generation config an error is thrown. This feature is intended for advanced users.
|
||||
synced_gpus (`bool`, *optional*, defaults to `False`):
|
||||
Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
|
||||
specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
|
||||
@@ -2225,7 +2225,7 @@ class MusicgenForConditionalGeneration(PreTrainedModel):
|
||||
generation config an error is thrown. This feature is intended for advanced users.
|
||||
synced_gpus (`bool`, *optional*, defaults to `False`):
|
||||
Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
|
||||
specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
|
||||
|
||||
@@ -962,7 +962,7 @@ class RagSequenceForGeneration(RagPreTrainedModel):
|
||||
Number of beams for beam search. 1 means no beam search.
|
||||
n_docs (`int`, *optional*, defaults to `config.n_docs`)
|
||||
Number of documents to retrieve and/or number of documents for which to generate an answer.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional kwargs will be passed to [`~generation.GenerationMixin.generate`].
|
||||
|
||||
Return:
|
||||
@@ -1444,7 +1444,7 @@ class RagTokenForGeneration(RagPreTrainedModel):
|
||||
Custom stopping criteria that complement the default stopping criteria built from arguments and a
|
||||
model's config. If a stopping criteria is passed that is already created with the arguments or a
|
||||
model's config an error is thrown.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model.
|
||||
|
||||
|
||||
@@ -1051,7 +1051,7 @@ class TFRagTokenForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingLoss
|
||||
Custom logits processors that complement the default logits processors built from arguments and a
|
||||
model's config. If a logit processor is passed that is already created with the arguments or a model's
|
||||
config an error is thrown.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model.
|
||||
|
||||
@@ -1629,7 +1629,7 @@ class TFRagSequenceForGeneration(TFRagPreTrainedModel, TFCausalLanguageModelingL
|
||||
Number of beams for beam search. 1 means no beam search.
|
||||
n_docs (`int`, *optional*, defaults to `config.n_docs`)
|
||||
Number of documents to retrieve and/or number of documents for which to generate an answer.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional kwargs will be passed to [`~generation.GenerationMixin.generate`]
|
||||
|
||||
Return:
|
||||
|
||||
@@ -1394,7 +1394,7 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua
|
||||
Whether to return token-level timestamps with the text. This can be used with or without the
|
||||
`return_timestamps` option. To get word-level timestamps, use the tokenizer to group the tokens into
|
||||
words.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
|
||||
specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
|
||||
|
||||
@@ -1608,7 +1608,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
|
||||
Whether to return token-level timestamps with the text. This can be used with or without the
|
||||
`return_timestamps` option. To get word-level timestamps, use the tokenizer to group the tokens into
|
||||
words.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
|
||||
forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
|
||||
specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
|
||||
|
||||
@@ -201,7 +201,7 @@ class AdamWeightDecay(Adam):
|
||||
`include_in_weight_decay` is passed, the names in it will supersede this list.
|
||||
name (`str`, *optional*, defaults to 'AdamWeightDecay'):
|
||||
Optional name for the operations created when applying gradients.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`, `decay`}. `clipnorm` is clip gradients by
|
||||
norm; `clipvalue` is clip gradients by value, `decay` is included for backward compatibility to allow time
|
||||
inverse decay of learning rate. `lr` is included for backward compatibility, recommended to use
|
||||
|
||||
@@ -634,10 +634,10 @@ def pipeline(
|
||||
Whether or not to allow for custom code defined on the Hub in their own modeling, configuration,
|
||||
tokenization or even pipeline files. This option should only be set to `True` for repositories you trust
|
||||
and in which you have read the code, as it will execute code present on the Hub on your local machine.
|
||||
model_kwargs:
|
||||
model_kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
|
||||
**model_kwargs)` function.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
|
||||
corresponding pipeline class for possible values).
|
||||
|
||||
|
||||
@@ -111,7 +111,7 @@ class ProcessorMixin(PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
"""
|
||||
os.makedirs(save_directory, exist_ok=True)
|
||||
|
||||
@@ -834,7 +834,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
|
||||
Whether or not the input is already pre-tokenized (e.g., split into words). If set to `True`, the
|
||||
tokenizer assumes the input is already split into words (for instance, by splitting it on whitespace)
|
||||
which it will tokenize. This is useful for NER or token classification.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Keyword arguments to use for the tokenization.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -2133,7 +2133,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
||||
Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
|
||||
repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
|
||||
namespace).
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -630,7 +630,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
|
||||
special_tokens_map (`Dict[str, str]`, *optional*):
|
||||
If you want to rename some of the special tokens this tokenizer uses, pass along a mapping old special
|
||||
token name to new special token name in this argument.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to the trainer from the 🤗 Tokenizers library.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -704,7 +704,7 @@ class LocalAgent(Agent):
|
||||
Args:
|
||||
pretrained_model_name_or_path (`str` or `os.PathLike`):
|
||||
The name of a repo on the Hub or a local path to a folder containing both model and tokenizer.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Keyword arguments passed along to [`~PreTrainedModel.from_pretrained`].
|
||||
|
||||
Example:
|
||||
|
||||
@@ -1475,7 +1475,7 @@ class Trainer:
|
||||
ignore_keys_for_eval (`List[str]`, *optional*)
|
||||
A list of keys in the output of your model (if it is a dictionary) that should be ignored when
|
||||
gathering predictions for evaluation during the training.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments used to hide deprecated arguments
|
||||
"""
|
||||
if resume_from_checkpoint is False:
|
||||
@@ -3567,7 +3567,7 @@ class Trainer:
|
||||
Message to commit while pushing.
|
||||
blocking (`bool`, *optional*, defaults to `True`):
|
||||
Whether the function should return only when the `git push` has finished.
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments passed along to [`~Trainer.create_model_card`].
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -257,7 +257,7 @@ class DistributedSamplerWithLoop(DistributedSampler):
|
||||
Dataset used for sampling.
|
||||
batch_size (`int`):
|
||||
The batch size used with this sampler
|
||||
kwargs:
|
||||
kwargs (`Dict[str, Any]`, *optional*):
|
||||
All other keyword arguments passed to `DistributedSampler`.
|
||||
"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user