enable misc cases on XPU & use device agnostic APIs for cases in tests (#38192)
* use device agnostic APIs in tests Signed-off-by: Matrix Yao <matrix.yao@intel.com> * more Signed-off-by: Matrix Yao <matrix.yao@intel.com> * fix style Signed-off-by: Matrix Yao <matrix.yao@intel.com> * add reset_peak_memory_stats API Signed-off-by: YAO Matrix <matrix.yao@intel.com> * update --------- Signed-off-by: Matrix Yao <matrix.yao@intel.com> Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -30,6 +30,7 @@ from transformers import (
|
||||
)
|
||||
from transformers.models.idefics3 import Idefics3VisionConfig
|
||||
from transformers.testing_utils import (
|
||||
backend_empty_cache,
|
||||
require_bitsandbytes,
|
||||
require_torch,
|
||||
require_torch_large_accelerator,
|
||||
@@ -302,7 +303,7 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@slow
|
||||
@require_torch_large_accelerator
|
||||
|
||||
@@ -17,7 +17,11 @@ import unittest
|
||||
from functools import lru_cache
|
||||
|
||||
from transformers import CohereTokenizerFast
|
||||
from transformers.testing_utils import require_jinja, require_tokenizers, require_torch_multi_gpu
|
||||
from transformers.testing_utils import (
|
||||
require_jinja,
|
||||
require_tokenizers,
|
||||
require_torch_multi_accelerator,
|
||||
)
|
||||
|
||||
from ...test_tokenization_common import TokenizerTesterMixin, use_cache_if_possible
|
||||
|
||||
@@ -55,7 +59,7 @@ class CohereTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
|
||||
return CohereTokenizerFast.from_pretrained(pretrained_name, **kwargs)
|
||||
|
||||
# This gives CPU OOM on a single-gpu runner (~60G RAM). On multi-gpu runner, it has ~180G RAM which is enough.
|
||||
@require_torch_multi_gpu
|
||||
@require_torch_multi_accelerator
|
||||
def test_torch_encode_plus_sent_to_model(self):
|
||||
super().test_torch_encode_plus_sent_to_model()
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ from transformers.models.colpali.configuration_colpali import ColPaliConfig
|
||||
from transformers.models.colpali.modeling_colpali import ColPaliForRetrieval, ColPaliForRetrievalOutput
|
||||
from transformers.models.colpali.processing_colpali import ColPaliProcessor
|
||||
from transformers.testing_utils import (
|
||||
backend_empty_cache,
|
||||
require_torch,
|
||||
require_vision,
|
||||
slow,
|
||||
@@ -303,7 +304,7 @@ class ColPaliModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@slow
|
||||
def test_model_integration_test(self):
|
||||
|
||||
@@ -35,7 +35,7 @@ from transformers.testing_utils import (
|
||||
require_flash_attn,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_multi_gpu,
|
||||
require_torch_multi_accelerator,
|
||||
require_torch_sdpa,
|
||||
slow,
|
||||
torch_device,
|
||||
@@ -583,7 +583,7 @@ class Idefics2ForConditionalGenerationIntegrationTest(unittest.TestCase):
|
||||
cleanup(torch_device, gc_collect=True)
|
||||
|
||||
@slow
|
||||
@require_torch_multi_gpu
|
||||
@require_torch_multi_accelerator
|
||||
def test_integration_test(self):
|
||||
model = Idefics2ForConditionalGeneration.from_pretrained(
|
||||
"HuggingFaceM4/idefics2-8b-base",
|
||||
|
||||
@@ -31,7 +31,7 @@ from transformers import (
|
||||
is_torch_available,
|
||||
is_vision_available,
|
||||
)
|
||||
from transformers.testing_utils import require_soundfile, require_torch, slow, torch_device
|
||||
from transformers.testing_utils import backend_empty_cache, require_soundfile, require_torch, slow, torch_device
|
||||
from transformers.utils import is_soundfile_available
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -296,7 +296,7 @@ class Phi4MultimodalIntegrationTest(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def test_text_only_generation(self):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
|
||||
@@ -29,6 +29,7 @@ from transformers import (
|
||||
is_vision_available,
|
||||
)
|
||||
from transformers.testing_utils import (
|
||||
backend_empty_cache,
|
||||
is_flaky,
|
||||
require_cv2,
|
||||
require_flash_attn,
|
||||
@@ -421,7 +422,7 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@slow
|
||||
def test_small_model_integration_test(self):
|
||||
|
||||
@@ -28,6 +28,7 @@ from transformers import (
|
||||
is_vision_available,
|
||||
)
|
||||
from transformers.testing_utils import (
|
||||
backend_empty_cache,
|
||||
require_flash_attn,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
@@ -367,7 +368,7 @@ class Qwen2VLIntegrationTest(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
@slow
|
||||
def test_small_model_integration_test(self):
|
||||
|
||||
@@ -32,7 +32,6 @@ from transformers import WhisperConfig
|
||||
from transformers.testing_utils import (
|
||||
is_flaky,
|
||||
require_flash_attn,
|
||||
require_non_xpu,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
@@ -42,7 +41,7 @@ from transformers.testing_utils import (
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property, is_torch_available, is_torchaudio_available
|
||||
from transformers.utils import cached_property, is_torch_available, is_torch_xpu_available, is_torchaudio_available
|
||||
from transformers.utils.import_utils import is_datasets_available
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
@@ -2431,11 +2430,10 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
" How many different species are there in the chilli? How many different species are there in the chilli?",
|
||||
)
|
||||
|
||||
@require_non_xpu
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_speculative_decoding_distil(self):
|
||||
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
||||
torch_dtype = torch.float16 if (torch.cuda.is_available() or is_torch_xpu_available()) else torch.float32
|
||||
model_id = "openai/whisper-large-v2"
|
||||
model = WhisperForConditionalGeneration.from_pretrained(
|
||||
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
||||
|
||||
Reference in New Issue
Block a user