From b8cdc262d5ed963ac4b9530f5350fd17a0eef0aa Mon Sep 17 00:00:00 2001
From: Fanli Lin <fanli.lin@intel.com>
Date: Wed, 4 Dec 2024 02:53:45 +0800
Subject: [PATCH] [docs] use device-agnostic instead of `cuda` (#35047)

* fix on xpu

* [run_all]

* add the missing import for Image lib

* add more devices in comment

* bug fix

* replace cuda
---
 docs/source/en/perf_torch_compile.md | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/docs/source/en/perf_torch_compile.md b/docs/source/en/perf_torch_compile.md
index acc424930b..2155a403b2 100644
--- a/docs/source/en/perf_torch_compile.md
+++ b/docs/source/en/perf_torch_compile.md
@@ -27,7 +27,7 @@ To compile any computer vision model of your choice, call `torch.compile()` on t
 ```diff
 from transformers import AutoModelForImageClassification
 
-model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to(DEVICE)
 + model = torch.compile(model)
 ```
 
@@ -47,15 +47,17 @@ from PIL import Image
 import requests
 import numpy as np
 from transformers import AutoImageProcessor, AutoModelForImageClassification
+from accelerate.test_utils.testing import get_backend
 
+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
 image = Image.open(requests.get(url, stream=True).raw)
 
 processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
-model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to(device)
 model = torch.compile(model)
 
-processed_input = processor(image, return_tensors='pt').to(device="cuda")
+processed_input = processor(image, return_tensors='pt').to(device)
 
 with torch.no_grad():
     _ = model(**processed_input)
@@ -66,13 +68,15 @@ with torch.no_grad():
 
 ```python 
 from transformers import AutoImageProcessor, AutoModelForObjectDetection
+from accelerate.test_utils.testing import get_backend
 
+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
-model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(device)
 model = torch.compile(model)
 
 texts = ["a photo of a cat", "a photo of a dog"]
-inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+inputs = processor(text=texts, images=image, return_tensors="pt").to(device)
 
 with torch.no_grad():
     _ = model(**inputs)
@@ -82,11 +86,13 @@ with torch.no_grad():
 
 ```python 
 from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+from accelerate.test_utils.testing import get_backend
 
+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to(device)
 model = torch.compile(model)
-seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+seg_inputs = processor(images=image, return_tensors="pt").to(device)
 
 with torch.no_grad():
     _ = model(**seg_inputs)