Improve @auto_docstring doc and rename args_doc.py to auto_docstring.py (#39439)

* rename `args_doc.py` to `auto_docstring.py` and improve doc

* modifs after review
This commit is contained in:
Yoni Gozlan
2025-07-18 14:00:34 -04:00
committed by GitHub
parent de0dd3139d
commit 541bed22d6
4 changed files with 179 additions and 29 deletions

View File

@@ -64,9 +64,9 @@ Arguments can also be passed directly to `@auto_docstring` for more control. Use
It builds upon the standard Transformer architecture with unique modifications.""", It builds upon the standard Transformer architecture with unique modifications.""",
custom_args=""" custom_args="""
custom_parameter (`type`, *optional*, defaults to `default_value`): custom_parameter (`type`, *optional*, defaults to `default_value`):
A concise description for custom_parameter if not defined or overriding the description in `args_doc.py`. A concise description for custom_parameter if not defined or overriding the description in `auto_docstring.py`.
internal_helper_arg (`type`, *optional*, defaults to `default_value`): internal_helper_arg (`type`, *optional*, defaults to `default_value`):
A concise description for internal_helper_arg if not defined or overriding the description in `args_doc.py`. A concise description for internal_helper_arg if not defined or overriding the description in `auto_docstring.py`.
""" """
) )
class MySpecialModel(PreTrainedModel): class MySpecialModel(PreTrainedModel):
@@ -85,13 +85,40 @@ class MySpecialModel(PreTrainedModel):
def __init__(self, config: ConfigType, custom_parameter: "type" = "default_value", internal_helper_arg=None): def __init__(self, config: ConfigType, custom_parameter: "type" = "default_value", internal_helper_arg=None):
r""" r"""
custom_parameter (`type`, *optional*, defaults to `default_value`): custom_parameter (`type`, *optional*, defaults to `default_value`):
A concise description for custom_parameter if not defined or overriding the description in `args_doc.py`. A concise description for custom_parameter if not defined or overriding the description in `auto_docstring.py`.
internal_helper_arg (`type`, *optional*, defaults to `default_value`): internal_helper_arg (`type`, *optional*, defaults to `default_value`):
A concise description for internal_helper_arg if not defined or overriding the description in `args_doc.py`. A concise description for internal_helper_arg if not defined or overriding the description in `auto_docstring.py`.
""" """
# ... # ...
``` ```
You should also use the `@auto_docstring` decorator for classes that inherit from [`~utils.ModelOutput`].
```python
@dataclass
@auto_docstring(
custom_intro="""
Custom model outputs with additional fields.
"""
)
class MyModelOutput(ImageClassifierOutput):
r"""
loss (`torch.FloatTensor`, *optional*):
The loss of the model.
custom_field (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*):
A custom output field specific to this model.
"""
# Standard fields like hidden_states, logits, attentions etc. can be automatically documented if the description is the same as the standard arguments.
# However, given that the loss docstring is often different per model, you should document it in the docstring above.
loss: Optional[torch.FloatTensor] = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None
attentions: Optional[tuple[torch.FloatTensor, ...]] = None
# Custom fields need to be documented in the docstring above
custom_field: Optional[torch.FloatTensor] = None
```
</hfoption> </hfoption>
<hfoption id="functions"> <hfoption id="functions">
@@ -171,7 +198,7 @@ class MyModel(PreTrainedModel):
There are some rules for documenting different types of arguments and they're listed below. There are some rules for documenting different types of arguments and they're listed below.
- Standard arguments (`input_ids`, `attention_mask`, `pixel_values`, etc.) are defined and retrieved from `args_doc.py`. It is the single source of truth for standard arguments and should not be redefined locally if an argument's description and shape is the same as an argument in `args_doc.py`. - Standard arguments (`input_ids`, `attention_mask`, `pixel_values`, etc.) are defined and retrieved from `auto_docstring.py`. It is the single source of truth for standard arguments and should not be redefined locally if an argument's description and shape is the same as an argument in `auto_docstring.py`.
If a standard argument behaves differently in your model, then you can override it locally in a `r""" """` block. This local definition has a higher priority. For example, the `labels` argument is often customized per model and typically requires overriding. If a standard argument behaves differently in your model, then you can override it locally in a `r""" """` block. This local definition has a higher priority. For example, the `labels` argument is often customized per model and typically requires overriding.
@@ -245,7 +272,7 @@ When working with modular files (`modular_model.py`), follow the guidelines belo
The `@auto_docstring` decorator automatically generates docstrings by: The `@auto_docstring` decorator automatically generates docstrings by:
1. Inspecting the signature (arguments, types, defaults) of the decorated class' `__init__` method or the decorated function. 1. Inspecting the signature (arguments, types, defaults) of the decorated class' `__init__` method or the decorated function.
2. Retrieving the predefined docstrings for common arguments (`input_ids`, `attention_mask`, etc.) from internal library sources like [`ModelArgs`], [`ImageProcessorArgs`], and the `args_doc.py` file. 2. Retrieving the predefined docstrings for common arguments (`input_ids`, `attention_mask`, etc.) from internal library sources like [`ModelArgs`], [`ImageProcessorArgs`], and the `auto_docstring.py` file.
3. Adding argument descriptions in one of two ways as shown below. 3. Adding argument descriptions in one of two ways as shown below.
| method | description | usage | | method | description | usage |
@@ -253,7 +280,7 @@ The `@auto_docstring` decorator automatically generates docstrings by:
| `r""" """` | add custom docstring content directly to a method signature or within the `__init__` docstring | document new arguments or override standard descriptions | | `r""" """` | add custom docstring content directly to a method signature or within the `__init__` docstring | document new arguments or override standard descriptions |
| `custom_args` | add custom docstrings for specific arguments directly in `@auto_docstring` | define docstring for new arguments once if they're repeated in multiple places in the modeling file | | `custom_args` | add custom docstrings for specific arguments directly in `@auto_docstring` | define docstring for new arguments once if they're repeated in multiple places in the modeling file |
4. Adding class and function descriptions. For model classes with standard naming patterns, like `ModelForCausalLM`, or if it belongs to a pipeline, `@auto_docstring` automatically generates the appropriate descriptions with `ClassDocstring` from `args_doc.py`. 4. Adding class and function descriptions. For model classes with standard naming patterns, like `ModelForCausalLM`, or if it belongs to a pipeline, `@auto_docstring` automatically generates the appropriate descriptions with `ClassDocstring` from `auto_docstring.py`.
`@auto_docstring` also accepts the `custom_intro` argument to describe a class or function. `@auto_docstring` also accepts the `custom_intro` argument to describe a class or function.

View File

@@ -21,7 +21,7 @@ from huggingface_hub.constants import HF_HUB_DISABLE_TELEMETRY as DISABLE_TELEME
from packaging import version from packaging import version
from .. import __version__ from .. import __version__
from .args_doc import ( from .auto_docstring import (
ClassAttrs, ClassAttrs,
ClassDocstring, ClassDocstring,
ImageProcessorArgs, ImageProcessorArgs,

View File

@@ -1271,7 +1271,7 @@ def _get_model_info(func, parent_class):
else: else:
config_class = "ModelConfig" config_class = "ModelConfig"
print( print(
f"🚨 Config not found for {model_name_lowercase}. You can manually add it to HARDCODED_CONFIG_FOR_MODELS in utils/args_doc.py" f"🚨 Config not found for {model_name_lowercase}. You can manually add it to HARDCODED_CONFIG_FOR_MODELS in utils/auto_docstring.py"
) )
return model_name_lowercase, class_name, config_class return model_name_lowercase, class_name, config_class
@@ -1893,27 +1893,150 @@ def auto_class_docstring(cls, custom_intro=None, custom_args=None, checkpoint=No
def auto_docstring(obj=None, *, custom_intro=None, custom_args=None, checkpoint=None): def auto_docstring(obj=None, *, custom_intro=None, custom_args=None, checkpoint=None):
""" r"""
Automatically generates docstrings for classes and methods in the Transformers library. Automatically generates comprehensive docstrings for model classes and methods in the Transformers library.
This decorator can be used in the following forms: This decorator reduces boilerplate by automatically including standard argument descriptions while allowing
@auto_docstring overrides to add new or custom arguments. It inspects function signatures, retrieves predefined docstrings
def my_function(...): for common arguments (like `input_ids`, `attention_mask`, etc.), and generates complete documentation
... including examples and return value descriptions.
or
@auto_docstring() For complete documentation and examples, read this [guide](https://huggingface.co/docs/transformers/auto_docstring).
def my_function(...):
... Examples of usage:
or
@auto_docstring(custom_intro="Custom intro", ...) Basic usage (no parameters):
def my_function(...): ```python
... @auto_docstring
class MyAwesomeModel(PreTrainedModel):
def __init__(self, config, custom_parameter: int = 10):
r'''
custom_parameter (`int`, *optional*, defaults to 10):
Description of the custom parameter for MyAwesomeModel.
'''
super().__init__(config)
self.custom_parameter = custom_parameter
```
Using `custom_intro` with a class:
```python
@auto_docstring(
custom_intro="This model implements a novel attention mechanism for improved performance."
)
class MySpecialModel(PreTrainedModel):
def __init__(self, config, attention_type: str = "standard"):
r'''
attention_type (`str`, *optional*, defaults to "standard"):
Type of attention mechanism to use.
'''
super().__init__(config)
```
Using `custom_intro` with a method, and specify custom arguments and example directly in the docstring:
```python
@auto_docstring(
custom_intro="Performs forward pass with enhanced attention computation."
)
def forward(
self,
input_ids: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
):
r'''
custom_parameter (`int`, *optional*, defaults to 10):
Description of the custom parameter for MyAwesomeModel.
Example:
```python
>>> model = MyAwesomeModel(config)
>>> model.forward(input_ids=torch.tensor([1, 2, 3]), attention_mask=torch.tensor([1, 1, 1]))
```
'''
```
Using `custom_args` to define reusable arguments:
```python
VISION_ARGS = r'''
pixel_values (`torch.FloatTensor`, *optional*):
Pixel values of the input images.
image_features (`torch.FloatTensor`, *optional*):
Pre-computed image features for efficient processing.
'''
@auto_docstring(custom_args=VISION_ARGS)
def encode_images(self, pixel_values=None, image_features=None):
# ... method implementation
```
Combining `custom_intro` and `custom_args`:
```python
MULTIMODAL_ARGS = r'''
vision_features (`torch.FloatTensor`, *optional*):
Pre-extracted vision features from the vision encoder.
fusion_strategy (`str`, *optional*, defaults to "concat"):
Strategy for fusing text and vision modalities.
'''
@auto_docstring(
custom_intro="Processes multimodal inputs combining text and vision.",
custom_args=MULTIMODAL_ARGS
)
def forward(
self,
input_ids,
attention_mask=None,
vision_features=None,
fusion_strategy="concat"
):
# ... multimodal processing
```
Using with ModelOutput classes:
```python
@dataclass
@auto_docstring(
custom_intro="Custom model outputs with additional fields."
)
class MyModelOutput(ImageClassifierOutput):
r'''
loss (`torch.FloatTensor`, *optional*):
The loss of the model.
custom_field (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*):
A custom output field specific to this model.
'''
# Standard fields like hidden_states, logits, attentions etc. can be automatically documented
# However, given that the loss docstring is often different per model, you should document it above
loss: Optional[torch.FloatTensor] = None
logits: Optional[torch.FloatTensor] = None
hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None
attentions: Optional[tuple[torch.FloatTensor, ...]] = None
custom_field: Optional[torch.FloatTensor] = None
```
Args: Args:
custom_intro (str, optional): Custom introduction text to add to the docstring. This will replace the default custom_intro (`str`, *optional*):
introduction text generated by the decorator before the Args section. Custom introduction text to add to the docstring. This replaces the default
checkpoint (str, optional): Checkpoint name to use in the docstring. This should be automatically inferred from the introduction text generated by the decorator before the Args section. Use this to describe what
model configuration class, but can be overridden if needed. makes your model or method special.
custom_args (`str`, *optional*):
Custom argument documentation in docstring format. This allows you to define
argument descriptions once and reuse them across multiple methods. The format should follow the
standard docstring convention: `arg_name (`type`, *optional*, defaults to `value`): Description.`
checkpoint (`str`, *optional*):
Checkpoint name to use in examples within the docstring. This is typically
automatically inferred from the model configuration class, but can be overridden if needed for
custom examples.
Note:
- Standard arguments (`input_ids`, `attention_mask`, `pixel_values`, etc.) are automatically documented
from predefined descriptions and should not be redefined unless their behavior differs in your model.
- New or custom arguments should be documented in the method's docstring using the `r''' '''` block
or passed via the `custom_args` parameter.
- For model classes, the decorator derives parameter descriptions from the `__init__` method's signature
and docstring.
- Return value documentation is automatically generated for methods that return ModelOutput subclasses.
""" """
def auto_docstring_decorator(obj): def auto_docstring_decorator(obj):

View File

@@ -49,7 +49,7 @@ from check_repo import ignore_undocumented
from git import Repo from git import Repo
from transformers.utils import direct_transformers_import from transformers.utils import direct_transformers_import
from transformers.utils.args_doc import ( from transformers.utils.auto_docstring import (
ImageProcessorArgs, ImageProcessorArgs,
ModelArgs, ModelArgs,
ModelOutputArgs, ModelOutputArgs,
@@ -1487,7 +1487,7 @@ def check_auto_docstrings(overwrite: bool = False, check_all: bool = False):
if docstring_args_ro_remove_warnings: if docstring_args_ro_remove_warnings:
if not overwrite: if not overwrite:
print( print(
"Some docstrings are redundant with the ones in `args_doc.py` and will be removed. Run `make fix-copies` or `python utils/check_docstrings.py --fix_and_overwrite` to remove the redundant docstrings." "Some docstrings are redundant with the ones in `auto_docstring.py` and will be removed. Run `make fix-copies` or `python utils/check_docstrings.py --fix_and_overwrite` to remove the redundant docstrings."
) )
print(f"🚨 Redundant docstring for the following arguments in {candidate_file}:") print(f"🚨 Redundant docstring for the following arguments in {candidate_file}:")
for warning in docstring_args_ro_remove_warnings: for warning in docstring_args_ro_remove_warnings: