From 6ba8a1ff4550b4450a22a0b0d907312955ce0fd5 Mon Sep 17 00:00:00 2001
From: Kyle Duffy <155960770+kyle-cohere@users.noreply.github.com>
Date: Thu, 31 Jul 2025 13:58:45 +0200
Subject: [PATCH] Update documentation for Cohere2Vision models (#39817)

* Update docs with pipeline example

* Add Cohere2Vision to list of vision models

* Sort models
---
 docs/source/en/model_doc/cohere2_vision.md    | 33 ++++++++++++++++++-
 src/transformers/models/auto/modeling_auto.py |  1 +
 2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/docs/source/en/model_doc/cohere2_vision.md b/docs/source/en/model_doc/cohere2_vision.md
index 123f9573b9..b0fcddc6d3 100644
--- a/docs/source/en/model_doc/cohere2_vision.md
+++ b/docs/source/en/model_doc/cohere2_vision.md
@@ -19,9 +19,12 @@ Command A Vision is built upon a robust architecture that leverages the latest a
 
 The model and image processor can be loaded as follows:
 
-```python
+<hfoptions id="usage">
+<hfoption id="AutoModel">
 
+```python
 import torch
+
 from transformers import AutoProcessor, AutoModelForImageTextToText
 
 model_id = "CohereLabs/command-a-vision-07-2025"
@@ -68,6 +71,34 @@ print(
 )
 ```
 
+</hfoption>
+<hfoption id="Pipeline">
+
+```python
+from transformers import pipeline
+
+pipe = pipeline(model="CohereLabs/command-a-vision-07-2025", task="image-text-to-text", device_map="auto")
+
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "image",
+                "url": "https://media.istockphoto.com/id/458012057/photo/istanbul-turkey.jpg?s=612x612&w=0&k=20&c=qogAOVvkpfUyqLUMr_XJQyq-HkACXyYUSZbKhBlPrxo=",
+            },
+            {"type": "text", "text": "Where was this taken ?"},
+        ],
+    },
+]
+
+outputs = pipe(text=messages, max_new_tokens=300, return_full_text=False)
+
+print(outputs)
+```
+</hfoption>
+</hfoptions>
+
 ## Cohere2VisionConfig
 
 [[autodoc]] Cohere2VisionConfig
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index ea69ed911d..259a297bf6 100644
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -712,6 +712,7 @@ MODEL_FOR_IMAGE_MAPPING_NAMES = OrderedDict(
         ("aimv2_vision_model", "Aimv2VisionModel"),
         ("beit", "BeitModel"),
         ("bit", "BitModel"),
+        ("cohere2_vision", "Cohere2VisionModel"),
         ("conditional_detr", "ConditionalDetrModel"),
         ("convnext", "ConvNextModel"),
         ("convnextv2", "ConvNextV2Model"),