From b8cdc262d5ed963ac4b9530f5350fd17a0eef0aa Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Wed, 4 Dec 2024 02:53:45 +0800 Subject: [PATCH] [docs] use device-agnostic instead of `cuda` (#35047) * fix on xpu * [run_all] * add the missing import for Image lib * add more devices in comment * bug fix * replace cuda --- docs/source/en/perf_torch_compile.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/source/en/perf_torch_compile.md b/docs/source/en/perf_torch_compile.md index acc424930b..2155a403b2 100644 --- a/docs/source/en/perf_torch_compile.md +++ b/docs/source/en/perf_torch_compile.md @@ -27,7 +27,7 @@ To compile any computer vision model of your choice, call `torch.compile()` on t ```diff from transformers import AutoModelForImageClassification -model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda") +model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to(DEVICE) + model = torch.compile(model) ``` @@ -47,15 +47,17 @@ from PIL import Image import requests import numpy as np from transformers import AutoImageProcessor, AutoModelForImageClassification +from accelerate.test_utils.testing import get_backend +device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) url = 'http://images.cocodataset.org/val2017/000000039769.jpg' image = Image.open(requests.get(url, stream=True).raw) processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224") -model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda") +model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to(device) model = torch.compile(model) -processed_input = processor(image, return_tensors='pt').to(device="cuda") +processed_input = processor(image, return_tensors='pt').to(device) with torch.no_grad(): _ = model(**processed_input) @@ -66,13 +68,15 @@ with torch.no_grad(): ```python from transformers import AutoImageProcessor, AutoModelForObjectDetection +from accelerate.test_utils.testing import get_backend +device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50") -model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda") +model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(device) model = torch.compile(model) texts = ["a photo of a cat", "a photo of a dog"] -inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda") +inputs = processor(text=texts, images=image, return_tensors="pt").to(device) with torch.no_grad(): _ = model(**inputs) @@ -82,11 +86,13 @@ with torch.no_grad(): ```python from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation +from accelerate.test_utils.testing import get_backend +device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") -model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda") +model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to(device) model = torch.compile(model) -seg_inputs = processor(images=image, return_tensors="pt").to("cuda") +seg_inputs = processor(images=image, return_tensors="pt").to(device) with torch.no_grad(): _ = model(**seg_inputs)