From 692d33690855fadda046ca69ff012903d2bf9c3d Mon Sep 17 00:00:00 2001
From: Duc-Viet Hoang <vietyb00@gmail.com>
Date: Thu, 7 Aug 2025 23:33:29 +0700
Subject: [PATCH] Fix HGNetV2 Model Card and Image Classification Pipeline
 Usage Tips (#39965)

* fix hgnet docs and image-classification pipeline

* use positional argument

* fix dit close hfoptions tag

* fix alphabet order

* fix hgnnet modular docstring

* Update hgnet_v2.md

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>

* Update hgnet_v2.md

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>

* Update docs/source/en/model_doc/hgnet_v2.md

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>

* fix: hgnet reference

* change hgnet to en doc

---------

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 docs/source/en/_toctree.yml                   | 24 +++----
 docs/source/en/model_doc/cvt.md               |  2 +-
 docs/source/en/model_doc/dit.md               |  3 +-
 docs/source/en/model_doc/hgnet_v2.md          | 63 ++++++++++++++++---
 docs/source/en/model_doc/mobilenet_v1.md      |  2 +-
 docs/source/en/model_doc/mobilenet_v2.md      |  2 +-
 docs/source/en/model_doc/swin.md              |  2 +-
 docs/source/en/model_doc/swinv2.md            |  2 +-
 docs/source/en/model_doc/vit.md               |  2 +-
 .../models/hgnet_v2/modeling_hgnet_v2.py      |  6 +-
 .../models/hgnet_v2/modular_hgnet_v2.py       |  6 +-
 11 files changed, 81 insertions(+), 33 deletions(-)
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index 556b19f011..778d4255e6 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -90,16 +90,16 @@
       title: Tools and RAG
     title: Chat with models
   - sections:
-      - local: serving
-        title: Serving LLMs, VLMs, and other chat-based models
-      - local: jan
-        title: Jan
-      - local: cursor
-        title: Cursor
-      - local: tiny_agents
-        title: Tiny-Agents CLI and MCP tools
-      - local: open_webui
-        title: Open WebUI
+    - local: serving
+      title: Serving LLMs, VLMs, and other chat-based models
+    - local: jan
+      title: Jan
+    - local: cursor
+      title: Cursor
+    - local: tiny_agents
+      title: Tiny-Agents CLI and MCP tools
+    - local: open_webui
+      title: Open WebUI
     title: Serving
   - sections:
     - local: perf_torch_compile
@@ -529,8 +529,6 @@
         title: Helium
       - local: model_doc/herbert
         title: HerBERT
-      - local: model_doc/hgnet_v2
-        title: HGNet-V2
       - local: model_doc/ibert
         title: I-BERT
       - local: model_doc/jamba
@@ -781,6 +779,8 @@
         title: FocalNet
       - local: model_doc/glpn
         title: GLPN
+      - local: model_doc/hgnet_v2
+        title: HGNet-V2
       - local: model_doc/hiera
         title: Hiera
       - local: model_doc/ijepa
diff --git a/docs/source/en/model_doc/cvt.md b/docs/source/en/model_doc/cvt.md
index 3edbc9283b..3332e832c2 100644
--- a/docs/source/en/model_doc/cvt.md
+++ b/docs/source/en/model_doc/cvt.md
@@ -47,7 +47,7 @@ pipeline = pipeline(
     torch_dtype=torch.float16,
     device=0 
 )
-pipeline(images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
+pipeline("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
 ```
 
 </hfoption>
diff --git a/docs/source/en/model_doc/dit.md b/docs/source/en/model_doc/dit.md
index 494f1afa41..a48c8e9110 100644
--- a/docs/source/en/model_doc/dit.md
+++ b/docs/source/en/model_doc/dit.md
@@ -47,7 +47,7 @@ pipeline = pipeline(
     torch_dtype=torch.float16,
     device=0
 )
-pipeline(images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/dit-example.jpg")
+pipeline("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/dit-example.jpg")
 ```
 
 </hfoption>
@@ -81,6 +81,7 @@ print(f"The predicted class label is: {predicted_class_label}")
 ```
 
 </hfoption>
+</hfoptions>
 
 ## Notes
 
diff --git a/docs/source/en/model_doc/hgnet_v2.md b/docs/source/en/model_doc/hgnet_v2.md
index a2e594b5f9..d12a1712db 100644
--- a/docs/source/en/model_doc/hgnet_v2.md
+++ b/docs/source/en/model_doc/hgnet_v2.md
@@ -14,20 +14,67 @@ rendered properly in your Markdown viewer.
 
 -->
 
+<div style="float: right;">
+    <div class="flex flex-wrap space-x-1">
+        <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+    </div>
+</div>
+
 # HGNet-V2
 
-## Overview
+[HGNetV2](https://github.com/PaddlePaddle/PaddleClas/blob/v2.6.0/docs/zh_CN/models/ImageNet1k/PP-HGNetV2.md) is a next-generation convolutional neural network (CNN) backbone built for optimal accuracy-latency tradeoff on NVIDIA GPUs. Building on the original[HGNet](https://github.com/PaddlePaddle/PaddleClas/blob/v2.6.0/docs/en/models/PP-HGNet_en.md), HGNetV2 delivers high accuracy at fast inference speeds and performs strongly on tasks like image classification, object detection, and segmentation, making it a practical choice for GPU-based computer vision applications.
 
-A HGNet-V2 (High Performance GPU Net) image classification model.
-HGNet arhtictecture was proposed in [HGNET: A Hierarchical Feature Guided Network for Occupancy Flow Field Prediction](https://huggingface.co/papers/2407.01097) by
-Zhan Chen, Chen Tang, Lu Xiong
+You can find all the original HGNet V2 models under the [USTC](https://huggingface.co/ustc-community/models?search=hgnet) organization.
 
-The abstract from the HGNET paper is the following:
+> [!TIP]
+> This model was contributed by [VladOS95-cyber](https://github.com/VladOS95-cyber).
+> Click on the HGNet V2 models in the right sidebar for more examples of how to apply HGNet V2 to different computer vision tasks.
 
-*Predicting the motion of multiple traffic participants has always been one of the most challenging tasks in autonomous driving. The recently proposed occupancy flow field prediction method has shown to be a more effective and scalable representation compared to general trajectory prediction methods. However, in complex multi-agent traffic scenarios, it remains difficult to model the interactions among various factors and the dependencies among prediction outputs at different time steps. In view of this, we propose a transformer-based hierarchical feature guided network (HGNET), which can efficiently extract features of agents and map information from visual and vectorized inputs, modeling multimodal interaction relationships. Second, we design the Feature-Guided Attention (FGAT) module to leverage the potential guiding effects between different prediction targets, thereby improving prediction accuracy. Additionally, to enhance the temporal consistency and causal relationships of the predictions, we propose a Time Series Memory framework to learn the conditional distribution models of the prediction outputs at future time steps from multivariate time series. The results demonstrate that our model exhibits competitive performance, which ranks 3rd in the 2024 Waymo Occupancy and Flow Prediction Challenge.*
+The example below demonstrates how to classify an image with [`Pipeline`] or the [`AutoModel`] class.
 
-This model was contributed by [VladOS95-cyber](https://github.com/VladOS95-cyber). 
-The original code can be found [here](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py).
+<hfoptions id="usage">
+<hfoption id="Pipeline">
+
+```py
+import torch
+from transformers import pipeline
+
+pipeline = pipeline(
+    task="image-classification",
+    model="ustc-community/hgnet-v2",
+    torch_dtype=torch.float16,
+    device=0
+)
+pipeline("http://images.cocodataset.org/val2017/000000039769.jpg")
+```
+
+</hfoption>
+<hfoption id="AutoModel">
+
+```py
+import torch
+import requests
+from transformers import HGNetV2ForImageClassification, AutoImageProcessor
+from PIL import Image
+
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+
+model = HGNetV2ForImageClassification.from_pretrained("ustc-community/hgnet-v2")
+processor = AutoImageProcessor.from_pretrained("ustc-community/hgnet-v2")
+
+inputs = processor(images=image, return_tensors="pt")
+with torch.no_grad():
+    logits = model(**inputs).logits
+predicted_class_id = logits.argmax(dim=-1).item()
+
+class_labels = model.config.id2label
+predicted_class_label = class_labels[predicted_class_id]
+print(f"The predicted class label is: {predicted_class_label}")
+```
+
+</hfoption>
+</hfoptions>
 
 ## HGNetV2Config
 
diff --git a/docs/source/en/model_doc/mobilenet_v1.md b/docs/source/en/model_doc/mobilenet_v1.md
index 20768db861..cd42629e40 100644
--- a/docs/source/en/model_doc/mobilenet_v1.md
+++ b/docs/source/en/model_doc/mobilenet_v1.md
@@ -45,7 +45,7 @@ pipeline = pipeline(
     torch_dtype=torch.float16,
     device=0
 )
-pipeline(images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
+pipeline("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
 ```
 
 </hfoption>
diff --git a/docs/source/en/model_doc/mobilenet_v2.md b/docs/source/en/model_doc/mobilenet_v2.md
index 5ddc4f0ea3..a97a721d88 100644
--- a/docs/source/en/model_doc/mobilenet_v2.md
+++ b/docs/source/en/model_doc/mobilenet_v2.md
@@ -46,7 +46,7 @@ pipeline = pipeline(
     torch_dtype=torch.float16,
     device=0
 )
-pipeline(images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
+pipeline("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
 ```
 
 </hfoption>
diff --git a/docs/source/en/model_doc/swin.md b/docs/source/en/model_doc/swin.md
index 6a079eb1d5..2b7a711397 100644
--- a/docs/source/en/model_doc/swin.md
+++ b/docs/source/en/model_doc/swin.md
@@ -45,7 +45,7 @@ pipeline = pipeline(
     torch_dtype=torch.float16,
     device=0
 )
-pipeline(images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
+pipeline("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
 ```
 </hfoption>
 
diff --git a/docs/source/en/model_doc/swinv2.md b/docs/source/en/model_doc/swinv2.md
index 0f71023e38..d1d3b15a77 100644
--- a/docs/source/en/model_doc/swinv2.md
+++ b/docs/source/en/model_doc/swinv2.md
@@ -42,7 +42,7 @@ pipeline = pipeline(
     torch_dtype=torch.float16,
     device=0
 )
-pipeline(images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
+pipeline("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
 ```
 
 </hfoption>
diff --git a/docs/source/en/model_doc/vit.md b/docs/source/en/model_doc/vit.md
index d07006ac1b..d09fed4b3a 100644
--- a/docs/source/en/model_doc/vit.md
+++ b/docs/source/en/model_doc/vit.md
@@ -48,7 +48,7 @@ pipeline = pipeline(
     torch_dtype=torch.float16,
     device=0
 )
-pipeline(images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
+pipeline("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
 ```
 
 </hfoption>
diff --git a/src/transformers/models/hgnet_v2/modeling_hgnet_v2.py b/src/transformers/models/hgnet_v2/modeling_hgnet_v2.py
index 71da4f055a..0baf0e7960 100644
--- a/src/transformers/models/hgnet_v2/modeling_hgnet_v2.py
+++ b/src/transformers/models/hgnet_v2/modeling_hgnet_v2.py
@@ -351,11 +351,11 @@ class HGNetV2Backbone(HGNetV2PreTrainedModel, BackboneMixin):
         Examples:
 
         ```python
-        >>> from transformers import RTDetrResNetConfig, RTDetrResNetBackbone
+        >>> from transformers import HGNetV2Config, HGNetV2Backbone
         >>> import torch
 
-        >>> config = RTDetrResNetConfig()
-        >>> model = RTDetrResNetBackbone(config)
+        >>> config = HGNetV2Config()
+        >>> model = HGNetV2Backbone(config)
 
         >>> pixel_values = torch.randn(1, 3, 224, 224)
 
diff --git a/src/transformers/models/hgnet_v2/modular_hgnet_v2.py b/src/transformers/models/hgnet_v2/modular_hgnet_v2.py
index 4f898718e3..9ee306a27e 100644
--- a/src/transformers/models/hgnet_v2/modular_hgnet_v2.py
+++ b/src/transformers/models/hgnet_v2/modular_hgnet_v2.py
@@ -474,11 +474,11 @@ class HGNetV2Backbone(HGNetV2PreTrainedModel, BackboneMixin):
         Examples:
 
         ```python
-        >>> from transformers import RTDetrResNetConfig, RTDetrResNetBackbone
+        >>> from transformers import HGNetV2Config, HGNetV2Backbone
         >>> import torch
 
-        >>> config = RTDetrResNetConfig()
-        >>> model = RTDetrResNetBackbone(config)
+        >>> config = HGNetV2Config()
+        >>> model = HGNetV2Backbone(config)
 
         >>> pixel_values = torch.randn(1, 3, 224, 224)