From 541bed22d6e4f97946a3a7d74f7e1a353e58643b Mon Sep 17 00:00:00 2001 From: Yoni Gozlan <74535834+yonigozlan@users.noreply.github.com> Date: Fri, 18 Jul 2025 14:00:34 -0400 Subject: [PATCH] Improve @auto_docstring doc and rename `args_doc.py` to `auto_docstring.py` (#39439) * rename `args_doc.py` to `auto_docstring.py` and improve doc * modifs after review --- docs/source/en/auto_docstring.md | 41 ++++- src/transformers/utils/__init__.py | 2 +- .../utils/{args_doc.py => auto_docstring.py} | 161 +++++++++++++++--- utils/check_docstrings.py | 4 +- 4 files changed, 179 insertions(+), 29 deletions(-) rename src/transformers/utils/{args_doc.py => auto_docstring.py} (92%) diff --git a/docs/source/en/auto_docstring.md b/docs/source/en/auto_docstring.md index 298a501dbf..0938d89ee4 100644 --- a/docs/source/en/auto_docstring.md +++ b/docs/source/en/auto_docstring.md @@ -64,9 +64,9 @@ Arguments can also be passed directly to `@auto_docstring` for more control. Use It builds upon the standard Transformer architecture with unique modifications.""", custom_args=""" custom_parameter (`type`, *optional*, defaults to `default_value`): - A concise description for custom_parameter if not defined or overriding the description in `args_doc.py`. + A concise description for custom_parameter if not defined or overriding the description in `auto_docstring.py`. internal_helper_arg (`type`, *optional*, defaults to `default_value`): - A concise description for internal_helper_arg if not defined or overriding the description in `args_doc.py`. + A concise description for internal_helper_arg if not defined or overriding the description in `auto_docstring.py`. """ ) class MySpecialModel(PreTrainedModel): @@ -85,13 +85,40 @@ class MySpecialModel(PreTrainedModel): def __init__(self, config: ConfigType, custom_parameter: "type" = "default_value", internal_helper_arg=None): r""" custom_parameter (`type`, *optional*, defaults to `default_value`): - A concise description for custom_parameter if not defined or overriding the description in `args_doc.py`. + A concise description for custom_parameter if not defined or overriding the description in `auto_docstring.py`. internal_helper_arg (`type`, *optional*, defaults to `default_value`): - A concise description for internal_helper_arg if not defined or overriding the description in `args_doc.py`. + A concise description for internal_helper_arg if not defined or overriding the description in `auto_docstring.py`. """ # ... ``` +You should also use the `@auto_docstring` decorator for classes that inherit from [`~utils.ModelOutput`]. + +```python +@dataclass +@auto_docstring( + custom_intro=""" + Custom model outputs with additional fields. + """ +) +class MyModelOutput(ImageClassifierOutput): + r""" + loss (`torch.FloatTensor`, *optional*): + The loss of the model. + custom_field (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*): + A custom output field specific to this model. + """ + + # Standard fields like hidden_states, logits, attentions etc. can be automatically documented if the description is the same as the standard arguments. + # However, given that the loss docstring is often different per model, you should document it in the docstring above. + loss: Optional[torch.FloatTensor] = None + logits: Optional[torch.FloatTensor] = None + hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None + attentions: Optional[tuple[torch.FloatTensor, ...]] = None + # Custom fields need to be documented in the docstring above + custom_field: Optional[torch.FloatTensor] = None +``` + @@ -171,7 +198,7 @@ class MyModel(PreTrainedModel): There are some rules for documenting different types of arguments and they're listed below. -- Standard arguments (`input_ids`, `attention_mask`, `pixel_values`, etc.) are defined and retrieved from `args_doc.py`. It is the single source of truth for standard arguments and should not be redefined locally if an argument's description and shape is the same as an argument in `args_doc.py`. +- Standard arguments (`input_ids`, `attention_mask`, `pixel_values`, etc.) are defined and retrieved from `auto_docstring.py`. It is the single source of truth for standard arguments and should not be redefined locally if an argument's description and shape is the same as an argument in `auto_docstring.py`. If a standard argument behaves differently in your model, then you can override it locally in a `r""" """` block. This local definition has a higher priority. For example, the `labels` argument is often customized per model and typically requires overriding. @@ -245,7 +272,7 @@ When working with modular files (`modular_model.py`), follow the guidelines belo The `@auto_docstring` decorator automatically generates docstrings by: 1. Inspecting the signature (arguments, types, defaults) of the decorated class' `__init__` method or the decorated function. -2. Retrieving the predefined docstrings for common arguments (`input_ids`, `attention_mask`, etc.) from internal library sources like [`ModelArgs`], [`ImageProcessorArgs`], and the `args_doc.py` file. +2. Retrieving the predefined docstrings for common arguments (`input_ids`, `attention_mask`, etc.) from internal library sources like [`ModelArgs`], [`ImageProcessorArgs`], and the `auto_docstring.py` file. 3. Adding argument descriptions in one of two ways as shown below. | method | description | usage | @@ -253,7 +280,7 @@ The `@auto_docstring` decorator automatically generates docstrings by: | `r""" """` | add custom docstring content directly to a method signature or within the `__init__` docstring | document new arguments or override standard descriptions | | `custom_args` | add custom docstrings for specific arguments directly in `@auto_docstring` | define docstring for new arguments once if they're repeated in multiple places in the modeling file | -4. Adding class and function descriptions. For model classes with standard naming patterns, like `ModelForCausalLM`, or if it belongs to a pipeline, `@auto_docstring` automatically generates the appropriate descriptions with `ClassDocstring` from `args_doc.py`. +4. Adding class and function descriptions. For model classes with standard naming patterns, like `ModelForCausalLM`, or if it belongs to a pipeline, `@auto_docstring` automatically generates the appropriate descriptions with `ClassDocstring` from `auto_docstring.py`. `@auto_docstring` also accepts the `custom_intro` argument to describe a class or function. diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 9c1132ec5c..1e212b5fa4 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -21,7 +21,7 @@ from huggingface_hub.constants import HF_HUB_DISABLE_TELEMETRY as DISABLE_TELEME from packaging import version from .. import __version__ -from .args_doc import ( +from .auto_docstring import ( ClassAttrs, ClassDocstring, ImageProcessorArgs, diff --git a/src/transformers/utils/args_doc.py b/src/transformers/utils/auto_docstring.py similarity index 92% rename from src/transformers/utils/args_doc.py rename to src/transformers/utils/auto_docstring.py index 3b521a27c5..c0f154baae 100644 --- a/src/transformers/utils/args_doc.py +++ b/src/transformers/utils/auto_docstring.py @@ -1271,7 +1271,7 @@ def _get_model_info(func, parent_class): else: config_class = "ModelConfig" print( - f"🚨 Config not found for {model_name_lowercase}. You can manually add it to HARDCODED_CONFIG_FOR_MODELS in utils/args_doc.py" + f"🚨 Config not found for {model_name_lowercase}. You can manually add it to HARDCODED_CONFIG_FOR_MODELS in utils/auto_docstring.py" ) return model_name_lowercase, class_name, config_class @@ -1893,27 +1893,150 @@ def auto_class_docstring(cls, custom_intro=None, custom_args=None, checkpoint=No def auto_docstring(obj=None, *, custom_intro=None, custom_args=None, checkpoint=None): - """ - Automatically generates docstrings for classes and methods in the Transformers library. + r""" + Automatically generates comprehensive docstrings for model classes and methods in the Transformers library. - This decorator can be used in the following forms: - @auto_docstring - def my_function(...): - ... - or - @auto_docstring() - def my_function(...): - ... - or - @auto_docstring(custom_intro="Custom intro", ...) - def my_function(...): - ... + This decorator reduces boilerplate by automatically including standard argument descriptions while allowing + overrides to add new or custom arguments. It inspects function signatures, retrieves predefined docstrings + for common arguments (like `input_ids`, `attention_mask`, etc.), and generates complete documentation + including examples and return value descriptions. + + For complete documentation and examples, read this [guide](https://huggingface.co/docs/transformers/auto_docstring). + + Examples of usage: + + Basic usage (no parameters): + ```python + @auto_docstring + class MyAwesomeModel(PreTrainedModel): + def __init__(self, config, custom_parameter: int = 10): + r''' + custom_parameter (`int`, *optional*, defaults to 10): + Description of the custom parameter for MyAwesomeModel. + ''' + super().__init__(config) + self.custom_parameter = custom_parameter + ``` + + Using `custom_intro` with a class: + ```python + @auto_docstring( + custom_intro="This model implements a novel attention mechanism for improved performance." + ) + class MySpecialModel(PreTrainedModel): + def __init__(self, config, attention_type: str = "standard"): + r''' + attention_type (`str`, *optional*, defaults to "standard"): + Type of attention mechanism to use. + ''' + super().__init__(config) + ``` + + Using `custom_intro` with a method, and specify custom arguments and example directly in the docstring: + ```python + @auto_docstring( + custom_intro="Performs forward pass with enhanced attention computation." + ) + def forward( + self, + input_ids: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + ): + r''' + custom_parameter (`int`, *optional*, defaults to 10): + Description of the custom parameter for MyAwesomeModel. + + Example: + + ```python + >>> model = MyAwesomeModel(config) + >>> model.forward(input_ids=torch.tensor([1, 2, 3]), attention_mask=torch.tensor([1, 1, 1])) + ``` + ''' + ``` + + Using `custom_args` to define reusable arguments: + ```python + VISION_ARGS = r''' + pixel_values (`torch.FloatTensor`, *optional*): + Pixel values of the input images. + image_features (`torch.FloatTensor`, *optional*): + Pre-computed image features for efficient processing. + ''' + + @auto_docstring(custom_args=VISION_ARGS) + def encode_images(self, pixel_values=None, image_features=None): + # ... method implementation + ``` + + Combining `custom_intro` and `custom_args`: + ```python + MULTIMODAL_ARGS = r''' + vision_features (`torch.FloatTensor`, *optional*): + Pre-extracted vision features from the vision encoder. + fusion_strategy (`str`, *optional*, defaults to "concat"): + Strategy for fusing text and vision modalities. + ''' + + @auto_docstring( + custom_intro="Processes multimodal inputs combining text and vision.", + custom_args=MULTIMODAL_ARGS + ) + def forward( + self, + input_ids, + attention_mask=None, + vision_features=None, + fusion_strategy="concat" + ): + # ... multimodal processing + ``` + + Using with ModelOutput classes: + ```python + @dataclass + @auto_docstring( + custom_intro="Custom model outputs with additional fields." + ) + class MyModelOutput(ImageClassifierOutput): + r''' + loss (`torch.FloatTensor`, *optional*): + The loss of the model. + custom_field (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*): + A custom output field specific to this model. + ''' + + # Standard fields like hidden_states, logits, attentions etc. can be automatically documented + # However, given that the loss docstring is often different per model, you should document it above + loss: Optional[torch.FloatTensor] = None + logits: Optional[torch.FloatTensor] = None + hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None + attentions: Optional[tuple[torch.FloatTensor, ...]] = None + custom_field: Optional[torch.FloatTensor] = None + ``` Args: - custom_intro (str, optional): Custom introduction text to add to the docstring. This will replace the default - introduction text generated by the decorator before the Args section. - checkpoint (str, optional): Checkpoint name to use in the docstring. This should be automatically inferred from the - model configuration class, but can be overridden if needed. + custom_intro (`str`, *optional*): + Custom introduction text to add to the docstring. This replaces the default + introduction text generated by the decorator before the Args section. Use this to describe what + makes your model or method special. + custom_args (`str`, *optional*): + Custom argument documentation in docstring format. This allows you to define + argument descriptions once and reuse them across multiple methods. The format should follow the + standard docstring convention: `arg_name (`type`, *optional*, defaults to `value`): Description.` + checkpoint (`str`, *optional*): + Checkpoint name to use in examples within the docstring. This is typically + automatically inferred from the model configuration class, but can be overridden if needed for + custom examples. + + Note: + - Standard arguments (`input_ids`, `attention_mask`, `pixel_values`, etc.) are automatically documented + from predefined descriptions and should not be redefined unless their behavior differs in your model. + - New or custom arguments should be documented in the method's docstring using the `r''' '''` block + or passed via the `custom_args` parameter. + - For model classes, the decorator derives parameter descriptions from the `__init__` method's signature + and docstring. + - Return value documentation is automatically generated for methods that return ModelOutput subclasses. """ def auto_docstring_decorator(obj): diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py index 946f678fb3..cd5f495355 100644 --- a/utils/check_docstrings.py +++ b/utils/check_docstrings.py @@ -49,7 +49,7 @@ from check_repo import ignore_undocumented from git import Repo from transformers.utils import direct_transformers_import -from transformers.utils.args_doc import ( +from transformers.utils.auto_docstring import ( ImageProcessorArgs, ModelArgs, ModelOutputArgs, @@ -1487,7 +1487,7 @@ def check_auto_docstrings(overwrite: bool = False, check_all: bool = False): if docstring_args_ro_remove_warnings: if not overwrite: print( - "Some docstrings are redundant with the ones in `args_doc.py` and will be removed. Run `make fix-copies` or `python utils/check_docstrings.py --fix_and_overwrite` to remove the redundant docstrings." + "Some docstrings are redundant with the ones in `auto_docstring.py` and will be removed. Run `make fix-copies` or `python utils/check_docstrings.py --fix_and_overwrite` to remove the redundant docstrings." ) print(f"🚨 Redundant docstring for the following arguments in {candidate_file}:") for warning in docstring_args_ro_remove_warnings: