enable cpu offloading for Bark on xpu (#37599)
* enable cpu offloading of bark modeling on XPU Signed-off-by: YAO Matrix <matrix.yao@intel.com> * remove debug print Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix style Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix review comments Signed-off-by: YAO Matrix <matrix.yao@intel.com> * enhance test Signed-off-by: YAO Matrix <matrix.yao@intel.com> * update * add deprecate message Signed-off-by: YAO Matrix <matrix.yao@intel.com> * update * update * trigger CI --------- Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -36,6 +36,7 @@ from transformers.models.bark.generation_configuration_bark import (
|
||||
from transformers.testing_utils import (
|
||||
require_flash_attn,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
@@ -1056,7 +1057,8 @@ class BarkModelIntegrationTests(unittest.TestCase):
|
||||
def inputs(self):
|
||||
input_ids = self.processor("In the light of the moon, a little egg lay on a leaf", voice_preset="en_speaker_6")
|
||||
|
||||
input_ids = input_ids.to(torch_device)
|
||||
for k, v in input_ids.items():
|
||||
input_ids[k] = v.to(torch_device)
|
||||
|
||||
return input_ids
|
||||
|
||||
@@ -1295,7 +1297,7 @@ class BarkModelIntegrationTests(unittest.TestCase):
|
||||
len(output_ids_with_min_eos_p[0, :].tolist()), len(output_ids_without_min_eos_p[0, :].tolist())
|
||||
)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
def test_generate_end_to_end_with_offload(self):
|
||||
input_ids = self.inputs
|
||||
@@ -1304,15 +1306,17 @@ class BarkModelIntegrationTests(unittest.TestCase):
|
||||
# standard generation
|
||||
output_with_no_offload = self.model.generate(**input_ids, do_sample=False, temperature=1.0)
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
torch_accelerator_module = getattr(torch, torch_device, torch.cuda)
|
||||
|
||||
memory_before_offload = torch.cuda.memory_allocated()
|
||||
torch_accelerator_module.empty_cache()
|
||||
|
||||
memory_before_offload = torch_accelerator_module.memory_allocated()
|
||||
model_memory_footprint = self.model.get_memory_footprint()
|
||||
|
||||
# activate cpu offload
|
||||
self.model.enable_cpu_offload()
|
||||
|
||||
memory_after_offload = torch.cuda.memory_allocated()
|
||||
memory_after_offload = torch_accelerator_module.memory_allocated()
|
||||
|
||||
# checks if the model have been offloaded
|
||||
|
||||
|
||||
Reference in New Issue
Block a user