Attn implementation for composite models (#32238)

* first try

* codestyle

* idefics2 is happy

* [run-slow] llava, llava_next, video_llava, vipllava, llava_next_video, idefics, idefics2, kosmos2, fuyu, blip, blip_2, instructblip, instructblipvideo, paligemma

* fix-copies

* [run-slow] llava, llava_next, video_llava, vipllava, llava_next_video, idefics, idefics2, kosmos2, fuyu, blip, blip_2, instructblip, instructblipvideo

* blip-2 needs to init vision from config

* when was this removed O_o

* minor fix

* tests

* this way?

* tests

* model-agnostic code

* codestyle

* add tests for idefics

* modify general test for VLMs

* no generation test for vlm yet!

* no generation test here also

* wanr in VIT-SDPA if output attn

* add more tests

* user can pass dict as attn impl

* repo consistency

* update

* muicgen

* no prints

* forgot speech enc-dec and clip

* how many composite models we have?

* musicgen meelody is same as mudicgen

* +siglip

* fix tests + add some more

* remove idefics custom overriden code

* make idefics2 automappable

* nits

* skip tests

* doctests

* Update src/transformers/models/idefics2/configuration_idefics2.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/clip/test_modeling_clip.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/idefics2/test_modeling_idefics2.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/idefics2/test_modeling_idefics2.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/configuration_utils.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* major update, no need for automap

* clean up

* add FA2 test

* more tests

* style

* skip tests

* why did these started failing now?

* no attributes for FA2 needed

* one tiny test

* address comment about FA2 false warning

* style

* add new models and resolve conflicts

* fix copies

* let it be this way for now, come back tomorrow to review

* some more fixes

* update

* more updates

* update

* fix copies

* style and tests

* another big update

* fix tests

* fix tests

* update

* another update

* fix tests

* fix copies

* fix tests

---------

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Raushan Turganbay
2024-10-22 06:54:44 +02:00
committed by GitHub
parent 32590b5ecb
commit 21d5025826
64 changed files with 1925 additions and 713 deletions

View File

@@ -27,6 +27,7 @@ from transformers.testing_utils import (
require_torch_fp16,
require_torch_gpu,
require_torch_multi_accelerator,
require_torch_sdpa,
require_vision,
slow,
torch_device,
@@ -456,6 +457,7 @@ class Blip2ForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, GenerationT
test_resize_embeddings = False
test_attention_outputs = False
test_torchscript = False
_is_composite = True
def setUp(self):
self.model_tester = Blip2ForConditionalGenerationDecoderOnlyModelTester(self)
@@ -488,6 +490,66 @@ class Blip2ForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, GenerationT
def test_save_load_fast_init_to_base(self):
pass
@require_torch_sdpa
def test_sdpa_can_dispatch_composite_models(self):
"""
Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
This tests only by looking at layer names, as usually SDPA layers are calles "SDPAAttention".
In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
See https://github.com/huggingface/transformers/pull/32238 for more info
The test tries to cover most general cases of composite models, VLMs with vision and text configs. Any model
that has a different set of sub-configs has to overwrite this test.
"""
if not self.has_attentions:
self.skipTest(reason="Model architecture does not support attentions")
if not self._is_composite:
self.skipTest(f"{self.all_model_classes[0].__name__} does not support SDPA")
for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
model = model_class(config)
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model_sdpa = model_class.from_pretrained(tmpdirname)
model_sdpa = model_sdpa.eval().to(torch_device)
text_attn = "sdpa" if model.language_model._supports_sdpa else "eager"
vision_attn = "sdpa" if model.vision_model._supports_sdpa else "eager"
qformer_attn = "sdpa" if model.qformer._supports_sdpa else "eager"
# `None` as it is the requested one which will be assigned to each sub-config
# Sub-model will dispatch to SDPA if it can (checked below that `SDPA` layers are present)
self.assertTrue(model.language_model.config._attn_implementation == text_attn)
self.assertTrue(model.vision_model.config._attn_implementation == vision_attn)
self.assertTrue(model.qformer.config._attn_implementation == qformer_attn)
model_eager = model_class.from_pretrained(tmpdirname, attn_implementation="eager")
model_eager = model_eager.eval().to(torch_device)
self.assertTrue(model_eager.config._attn_implementation == "eager")
self.assertTrue(model_eager.language_model.config._attn_implementation == "eager")
self.assertTrue(model_eager.vision_model.config._attn_implementation == "eager")
self.assertTrue(model_eager.qformer.config._attn_implementation == "eager")
for name, submodule in model_eager.named_modules():
class_name = submodule.__class__.__name__
if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
raise ValueError("The eager model should not have SDPA attention layers")
has_sdpa = False
for name, submodule in model_sdpa.named_modules():
class_name = submodule.__class__.__name__
if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
has_sdpa = True
break
if not has_sdpa and any(
module_attn == "sdpa" for module_attn in [text_attn, vision_attn, qformer_attn]
):
raise ValueError("The SDPA model should have SDPA attention layers")
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
@@ -715,6 +777,7 @@ class Blip2ModelTest(ModelTesterMixin, PipelineTesterMixin, GenerationTesterMixi
test_resize_embeddings = False
test_attention_outputs = False
test_torchscript = False
_is_composite = True
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
@@ -768,6 +831,66 @@ class Blip2ModelTest(ModelTesterMixin, PipelineTesterMixin, GenerationTesterMixi
def test_cpu_offload(self):
pass
@require_torch_sdpa
def test_sdpa_can_dispatch_composite_models(self):
"""
Tests if composite models dispatch correctly on SDPA/eager when requested so when loading the model.
This tests only by looking at layer names, as usually SDPA layers are calles "SDPAAttention".
In contrast to the above test, this one checks if the "config._attn_implamentation" is a dict after the model
is loaded, because we manually replicate requested attn implementation on each sub-config when loading.
See https://github.com/huggingface/transformers/pull/32238 for more info
The test tries to cover most general cases of composite models, VLMs with vision and text configs. Any model
that has a different set of sub-configs has to overwrite this test.
"""
if not self.has_attentions:
self.skipTest(reason="Model architecture does not support attentions")
if not self._is_composite:
self.skipTest(f"{self.all_model_classes[0].__name__} does not support SDPA")
for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
model = model_class(config)
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model_sdpa = model_class.from_pretrained(tmpdirname)
model_sdpa = model_sdpa.eval().to(torch_device)
text_attn = "sdpa" if model.language_model._supports_sdpa else "eager"
vision_attn = "sdpa" if model.vision_model._supports_sdpa else "eager"
qformer_attn = "sdpa" if model.qformer._supports_sdpa else "eager"
# `None` as it is the requested one which will be assigned to each sub-config
# Sub-model will dispatch to SDPA if it can (checked below that `SDPA` layers are present)
self.assertTrue(model.language_model.config._attn_implementation == text_attn)
self.assertTrue(model.vision_model.config._attn_implementation == vision_attn)
self.assertTrue(model.qformer.config._attn_implementation == qformer_attn)
model_eager = model_class.from_pretrained(tmpdirname, attn_implementation="eager")
model_eager = model_eager.eval().to(torch_device)
self.assertTrue(model_eager.config._attn_implementation == "eager")
self.assertTrue(model_eager.language_model.config._attn_implementation == "eager")
self.assertTrue(model_eager.vision_model.config._attn_implementation == "eager")
self.assertTrue(model_eager.qformer.config._attn_implementation == "eager")
for name, submodule in model_eager.named_modules():
class_name = submodule.__class__.__name__
if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
raise ValueError("The eager model should not have SDPA attention layers")
has_sdpa = False
for name, submodule in model_sdpa.named_modules():
class_name = submodule.__class__.__name__
if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
has_sdpa = True
break
if not has_sdpa and any(
module_attn == "sdpa" for module_attn in [text_attn, vision_attn, qformer_attn]
):
raise ValueError("The SDPA model should have SDPA attention layers")
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()