[tests] make more tests device-agnostic (#33580)

* enable

* fix

* add xpu skip

* add marker

* skip for xpu

* add more

* enable on accelerator

* add more cases

* add more tests

* add more
This commit is contained in:
Fanli Lin
2024-09-20 17:16:43 +08:00
committed by GitHub
parent 31650a53a1
commit 8bd1f2f338
7 changed files with 24 additions and 17 deletions

View File

@@ -30,7 +30,7 @@ from transformers.file_utils import cached_property
from transformers.testing_utils import ( from transformers.testing_utils import (
require_timm, require_timm,
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
require_vision, require_vision,
slow, slow,
torch_device, torch_device,
@@ -676,7 +676,7 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase):
self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes, atol=1e-2)) self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes, atol=1e-2))
self.assertListEqual(results["labels"], expected_labels) self.assertListEqual(results["labels"], expected_labels)
@require_torch_gpu @require_torch_accelerator
def test_inference_object_detection_head_equivalence_cpu_gpu(self): def test_inference_object_detection_head_equivalence_cpu_gpu(self):
processor = self.default_processor processor = self.default_processor
image = prepare_img() image = prepare_img()
@@ -690,8 +690,8 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase):
cpu_outputs = model(**encoding) cpu_outputs = model(**encoding)
# 2. run model on GPU # 2. run model on GPU
model.to("cuda") model.to(torch_device)
encoding = encoding.to("cuda") encoding = encoding.to(torch_device)
with torch.no_grad(): with torch.no_grad():
gpu_outputs = model(**encoding) gpu_outputs = model(**encoding)

View File

@@ -24,10 +24,12 @@ from parameterized import parameterized
from transformers import AutoTokenizer, LlamaConfig, StaticCache, is_torch_available, set_seed from transformers import AutoTokenizer, LlamaConfig, StaticCache, is_torch_available, set_seed
from transformers.testing_utils import ( from transformers.testing_utils import (
backend_empty_cache,
require_bitsandbytes, require_bitsandbytes,
require_flash_attn, require_flash_attn,
require_read_token, require_read_token,
require_torch, require_torch,
require_torch_accelerator,
require_torch_gpu, require_torch_gpu,
require_torch_sdpa, require_torch_sdpa,
slow, slow,
@@ -899,11 +901,11 @@ class LlamaIntegrationTest(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
class Mask4DTestHard(unittest.TestCase): class Mask4DTestHard(unittest.TestCase):
def tearDown(self): def tearDown(self):
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def setUp(self): def setUp(self):
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

View File

@@ -29,6 +29,7 @@ from transformers.testing_utils import (
require_flash_attn, require_flash_attn,
require_read_token, require_read_token,
require_torch, require_torch,
require_torch_accelerator,
require_torch_gpu, require_torch_gpu,
require_torch_sdpa, require_torch_sdpa,
slow, slow,
@@ -719,14 +720,14 @@ class MistralIntegrationTest(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
class Mask4DTestHard(unittest.TestCase): class Mask4DTestHard(unittest.TestCase):
model_name = "mistralai/Mistral-7B-v0.1" model_name = "mistralai/Mistral-7B-v0.1"
_model = None _model = None
def tearDown(self): def tearDown(self):
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
@property @property
def model(self): def model(self):

View File

@@ -21,7 +21,7 @@ from transformers.testing_utils import (
require_bitsandbytes, require_bitsandbytes,
require_read_token, require_read_token,
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device, torch_device,
) )
@@ -418,7 +418,7 @@ class RecurrentGemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
pass pass
@require_torch_gpu @require_torch_accelerator
@slow @slow
class RecurrentGemmaIntegrationTest(unittest.TestCase): class RecurrentGemmaIntegrationTest(unittest.TestCase):
input_text = ["Hello I am doing", "Hi today"] input_text = ["Hello I am doing", "Hi today"]

View File

@@ -21,9 +21,10 @@ from datasets import Audio, load_dataset
from transformers import UnivNetConfig, UnivNetFeatureExtractor from transformers import UnivNetConfig, UnivNetFeatureExtractor
from transformers.testing_utils import ( from transformers.testing_utils import (
backend_empty_cache,
is_torch_available, is_torch_available,
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device, torch_device,
) )
@@ -207,13 +208,13 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase):
self.assertTrue(outputs.shape[0] == 1, msg="Unbatched input should create batched output with bsz = 1") self.assertTrue(outputs.shape[0] == 1, msg="Unbatched input should create batched output with bsz = 1")
@require_torch_gpu @require_torch_accelerator
@slow @slow
class UnivNetModelIntegrationTests(unittest.TestCase): class UnivNetModelIntegrationTests(unittest.TestCase):
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def _load_datasamples(self, num_samples, sampling_rate=24000): def _load_datasamples(self, num_samples, sampling_rate=24000):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")

View File

@@ -34,10 +34,12 @@ from transformers.testing_utils import (
is_flaky, is_flaky,
is_pt_flax_cross_test, is_pt_flax_cross_test,
require_flash_attn, require_flash_attn,
require_non_xpu,
require_torch, require_torch,
require_torch_accelerator,
require_torch_fp16, require_torch_fp16,
require_torch_gpu, require_torch_gpu,
require_torch_multi_gpu, require_torch_multi_accelerator,
require_torchaudio, require_torchaudio,
slow, slow,
torch_device, torch_device,
@@ -2612,6 +2614,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
self.assertTrue(prompt in text) self.assertTrue(prompt in text)
@require_non_xpu
@slow @slow
@require_torch_gpu @require_torch_gpu
def test_speculative_decoding_distil(self): def test_speculative_decoding_distil(self):
@@ -3239,7 +3242,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
for i in range(num_samples): for i in range(num_samples):
assert decoded_all[i] == EXPECTED_TEXT[i] assert decoded_all[i] == EXPECTED_TEXT[i]
@require_torch_gpu @require_torch_accelerator
@slow @slow
def test_whisper_empty_longform(self): def test_whisper_empty_longform(self):
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
@@ -3278,7 +3281,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
torch.manual_seed(0) torch.manual_seed(0)
model.generate(**inputs, **gen_kwargs) model.generate(**inputs, **gen_kwargs)
@require_torch_multi_gpu @require_torch_multi_accelerator
@slow @slow
def test_whisper_empty_longform_multi_gpu(self): def test_whisper_empty_longform_multi_gpu(self):
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")

View File

@@ -4751,7 +4751,7 @@ class ModelTesterMixin:
# For now, Let's focus only on GPU for `torch.compile` # For now, Let's focus only on GPU for `torch.compile`
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_read_token @require_read_token
def test_torch_compile(self): def test_torch_compile(self):
if version.parse(torch.__version__) < version.parse("2.3"): if version.parse(torch.__version__) < version.parse("2.3"):