Fix CPU offload + disk offload tests (#27204)
Fix disk offload tests + weight sharing issues
This commit is contained in:
@@ -182,7 +182,11 @@ class VitDetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
# TODO: Fix me (once this model gets more usage)
|
||||
@unittest.skip("Does not work on the tiny model as we keep hitting edge cases.")
|
||||
def test_disk_offload(self):
|
||||
def test_disk_offload_bin(self):
|
||||
super().test_disk_offload()
|
||||
|
||||
@unittest.skip("Does not work on the tiny model as we keep hitting edge cases.")
|
||||
def test_disk_offload_safetensors(self):
|
||||
super().test_disk_offload()
|
||||
|
||||
# TODO: Fix me (once this model gets more usage)
|
||||
|
||||
@@ -1788,7 +1788,11 @@ class WhisperEncoderModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="Some undefined behavior encountered with tiny versions of this model. Skip for now.")
|
||||
def test_disk_offload(self):
|
||||
def test_disk_offload_bin(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="Some undefined behavior encountered with tiny versions of this model. Skip for now.")
|
||||
def test_disk_offload_safetensors(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="Some undefined behavior encountered with tiny versions of this model. Skip for now.")
|
||||
|
||||
@@ -2578,7 +2578,45 @@ class ModelTesterMixin:
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@require_torch_gpu
|
||||
def test_disk_offload(self):
|
||||
def test_disk_offload_bin(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
if model_class._no_split_modules is None:
|
||||
continue
|
||||
|
||||
inputs_dict_class = self._prepare_for_class(inputs_dict, model_class)
|
||||
model = model_class(config).eval()
|
||||
model = model.to(torch_device)
|
||||
torch.manual_seed(0)
|
||||
base_output = model(**inputs_dict_class)
|
||||
|
||||
model_size = compute_module_sizes(model)[""]
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.cpu().save_pretrained(tmp_dir, safe_serialization=False)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
max_size = int(self.model_split_percents[0] * model_size)
|
||||
max_memory = {0: max_size, "cpu": max_size}
|
||||
# This errors out cause it's missing an offload folder
|
||||
new_model = model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory)
|
||||
|
||||
max_size = int(self.model_split_percents[1] * model_size)
|
||||
max_memory = {0: max_size, "cpu": max_size}
|
||||
new_model = model_class.from_pretrained(
|
||||
tmp_dir, device_map="auto", max_memory=max_memory, offload_folder=tmp_dir
|
||||
)
|
||||
|
||||
self.check_device_map_is_respected(new_model, new_model.hf_device_map)
|
||||
torch.manual_seed(0)
|
||||
new_output = new_model(**inputs_dict_class)
|
||||
|
||||
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
|
||||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@require_torch_gpu
|
||||
def test_disk_offload_safetensors(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
@@ -2595,17 +2633,11 @@ class ModelTesterMixin:
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
max_size = int(self.model_split_percents[0] * model_size)
|
||||
max_memory = {0: max_size, "cpu": max_size}
|
||||
# This errors out cause it's missing an offload folder
|
||||
new_model = model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory)
|
||||
|
||||
max_size = int(self.model_split_percents[1] * model_size)
|
||||
max_memory = {0: max_size, "cpu": max_size}
|
||||
new_model = model_class.from_pretrained(
|
||||
tmp_dir, device_map="auto", max_memory=max_memory, offload_folder=tmp_dir
|
||||
)
|
||||
|
||||
# This doesn't error out as it's in safetensors and doesn't need an offload folder
|
||||
new_model = model_class.from_pretrained(tmp_dir, device_map="auto", max_memory=max_memory)
|
||||
|
||||
self.check_device_map_is_respected(new_model, new_model.hf_device_map)
|
||||
torch.manual_seed(0)
|
||||
|
||||
Reference in New Issue
Block a user