[docs] use device-agnostic instead of cuda (#35047)

* fix on xpu * [run_all] * add the missing import for Image lib * add more devices in comment * bug fix * replace cuda
2024-12-04 02:53:45 +08:00
parent 346597b644
commit b8cdc262d5
1 changed files with 13 additions and 7 deletions
--- a/docs/source/en/perf_torch_compile.md
+++ b/docs/source/en/perf_torch_compile.md
@@ -27,7 +27,7 @@ To compile any computer vision model of your choice, call `torch.compile()` on t
 ```diff
 from transformers import AutoModelForImageClassification

-model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to(DEVICE)
 + model = torch.compile(model)
 ```

@@ -47,15 +47,17 @@ from PIL import Image
 import requests
 import numpy as np
 from transformers import AutoImageProcessor, AutoModelForImageClassification
+from accelerate.test_utils.testing import get_backend

+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
 image = Image.open(requests.get(url, stream=True).raw)

 processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
-model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to(device)
 model = torch.compile(model)

-processed_input = processor(image, return_tensors='pt').to(device="cuda")
+processed_input = processor(image, return_tensors='pt').to(device)

 with torch.no_grad():
    _ = model(**processed_input)
@@ -66,13 +68,15 @@ with torch.no_grad():

 ```python 
 from transformers import AutoImageProcessor, AutoModelForObjectDetection
+from accelerate.test_utils.testing import get_backend

+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
-model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(device)
 model = torch.compile(model)

 texts = ["a photo of a cat", "a photo of a dog"]
-inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+inputs = processor(text=texts, images=image, return_tensors="pt").to(device)

 with torch.no_grad():
    _ = model(**inputs)
@@ -82,11 +86,13 @@ with torch.no_grad():

 ```python 
 from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+from accelerate.test_utils.testing import get_backend

+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to(device)
 model = torch.compile(model)
-seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+seg_inputs = processor(images=image, return_tensors="pt").to(device)

 with torch.no_grad():
    _ = model(**seg_inputs)