Speedup model init on CPU (by 10x+ for llama-3-8B as one example) (#31771)
* 1,100%! * Clean * Don't touch DS * Experiment with dtype allocation * skip test_load_save_without_tied_weights test * A little faster * Include proper upscaling? * Fixup tests * Potentially skip? * Let's see if this fixes git history * Maintain new dtype * Fin * Rm hook idea for now * New approach, see what breaks * stage * Clean * Stash * Should be fin now, just need to mark failing models * Clean up * Simplify * Deal with weird models * Enc/Dec * Skip w/ reason * Adjust test * Fix test * one more test * Keep experimenting * Fix ref * TO REMOVE: testing feedback CI * Right push * Update tests/utils/test_modeling_utils.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> * disable * Add new func * Test nits from Amy * Update src/transformers/modeling_utils.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Adjust comment * Adjust comment on skip * make private * Fin * Should be a not flag * Clarify and rename test --------- Co-authored-by: Marc Sun <marc@huggingface.co> Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
@@ -512,6 +512,12 @@ class BartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
def assert_tensors_close(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
|
||||
|
||||
@@ -476,6 +476,12 @@ class BigBirdPegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
|
||||
|
||||
self.assertTrue(torch.allclose(outputs1, outputs2, atol=1e-5))
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
@require_sentencepiece
|
||||
|
||||
@@ -758,6 +758,12 @@ class LongT5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
|
||||
[encoder_expected_shape] * len(attentions),
|
||||
)
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
class LongT5TGlobalModelTest(LongT5ModelTest):
|
||||
@@ -1097,6 +1103,12 @@ class LongT5EncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
[self.model_tester.num_attention_heads, block_len, 3 * block_len],
|
||||
)
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
class LongT5EncoderOnlyTGlobalModelTest(LongT5EncoderOnlyModelTest):
|
||||
def setUp(self):
|
||||
|
||||
@@ -778,6 +778,12 @@ class LxmertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
def test_save_load_low_cpu_mem_usage_no_safetensors(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
class LxmertModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
@@ -331,6 +331,12 @@ class M2M100ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
|
||||
model.generate(input_ids, attention_mask=attention_mask)
|
||||
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
def _long_tensor(tok_lst):
|
||||
return torch.tensor(tok_lst, dtype=torch.long, device=torch_device)
|
||||
|
||||
@@ -369,6 +369,12 @@ class MBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
||||
2,
|
||||
)
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
def assert_tensors_close(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
|
||||
|
||||
@@ -346,6 +346,12 @@ class NllbMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
||||
self.assertIsNotNone(model(**input_dict)["encoder_router_logits"][1])
|
||||
self.assertIsNotNone(model(**input_dict)["decoder_router_logits"][0])
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
@require_sentencepiece
|
||||
|
||||
@@ -323,6 +323,12 @@ class PLBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
|
||||
def test_sample_generate(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
def assert_tensors_close(a, b, atol=1e-12, prefix=""):
|
||||
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
|
||||
|
||||
@@ -506,6 +506,12 @@ class SeamlessM4TModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase):
|
||||
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
def test_attention_outputs(self):
|
||||
# expected length is subsampled so need to change a bit this test
|
||||
if not self.has_attentions:
|
||||
@@ -758,6 +764,12 @@ class SeamlessM4TModelWithTextInputTest(
|
||||
def test_retain_grad_hidden_states_attentions(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
class SeamlessM4TGenerationTest(unittest.TestCase):
|
||||
|
||||
@@ -522,6 +522,12 @@ class SeamlessM4Tv2ModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase)
|
||||
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
def test_attention_outputs(self):
|
||||
# expected length is subsampled so need to change a bit this test
|
||||
if not self.has_attentions:
|
||||
@@ -748,6 +754,12 @@ class SeamlessM4Tv2ModelWithTextInputTest(ModelTesterMixin, GenerationTesterMixi
|
||||
def test_training_gradient_checkpointing_use_reentrant_false(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
class SeamlessM4Tv2GenerationTest(unittest.TestCase):
|
||||
|
||||
@@ -720,6 +720,12 @@ class SwitchTransformersModelTest(ModelTesterMixin, GenerationTesterMixin, Pipel
|
||||
attn_weights = out[attn_name] if attn_name == attention_names[0] else out[attn_name][-1]
|
||||
self.assertEqual(sum([w.sum().item() for w in attn_weights]), 0.0)
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
class SwitchTransformersEncoderOnlyModelTester:
|
||||
def __init__(
|
||||
@@ -843,6 +849,12 @@ class SwitchTransformersEncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_model_fp16_forward(*config_and_inputs)
|
||||
|
||||
@unittest.skip(
|
||||
reason="This architecure has tied weights by default and there is no way to remove it, check: https://github.com/huggingface/transformers/pull/31771#issuecomment-2210915245"
|
||||
)
|
||||
def test_load_save_without_tied_weights(self):
|
||||
pass
|
||||
|
||||
|
||||
def use_task_specific_params(model, task):
|
||||
model.config.update(model.config.task_specific_params[task])
|
||||
|
||||
Reference in New Issue
Block a user