Attn implementation for composite models (#32238)

* first try

* codestyle

* idefics2 is happy

* [run-slow] llava, llava_next, video_llava, vipllava, llava_next_video, idefics, idefics2, kosmos2, fuyu, blip, blip_2, instructblip, instructblipvideo, paligemma

* fix-copies

* [run-slow] llava, llava_next, video_llava, vipllava, llava_next_video, idefics, idefics2, kosmos2, fuyu, blip, blip_2, instructblip, instructblipvideo

* blip-2 needs to init vision from config

* when was this removed O_o

* minor fix

* tests

* this way?

* tests

* model-agnostic code

* codestyle

* add tests for idefics

* modify general test for VLMs

* no generation test for vlm yet!

* no generation test here also

* wanr in VIT-SDPA if output attn

* add more tests

* user can pass dict as attn impl

* repo consistency

* update

* muicgen

* no prints

* forgot speech enc-dec and clip

* how many composite models we have?

* musicgen meelody is same as mudicgen

* +siglip

* fix tests + add some more

* remove idefics custom overriden code

* make idefics2 automappable

* nits

* skip tests

* doctests

* Update src/transformers/models/idefics2/configuration_idefics2.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/clip/test_modeling_clip.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/idefics2/test_modeling_idefics2.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/idefics2/test_modeling_idefics2.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/configuration_utils.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* major update, no need for automap

* clean up

* add FA2 test

* more tests

* style

* skip tests

* why did these started failing now?

* no attributes for FA2 needed

* one tiny test

* address comment about FA2 false warning

* style

* add new models and resolve conflicts

* fix copies

* let it be this way for now, come back tomorrow to review

* some more fixes

* update

* more updates

* update

* fix copies

* style and tests

* another big update

* fix tests

* fix tests

* update

* another update

* fix tests

* fix copies

* fix tests

---------

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Raushan Turganbay
2024-10-22 06:54:44 +02:00
committed by GitHub
parent 32590b5ecb
commit 21d5025826
64 changed files with 1925 additions and 713 deletions

View File

@@ -71,6 +71,51 @@ if is_vision_available():
class SiglipModelTesterMixin(ModelTesterMixin):
def test_sdpa_can_dispatch_composite_models(self):
for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
model = model_class(config)
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
# Load the model with SDPA
model_sdpa = model_class.from_pretrained(tmpdirname)
model_sdpa = model_sdpa.eval().to(torch_device)
# Load model with eager attention
model_eager = model_class.from_pretrained(
tmpdirname,
attn_implementation="eager",
)
model_eager = model_eager.eval().to(torch_device)
# SigLip has one shared cls attr for all models, so we assign both submodels heer
vision_attn = text_attn = "sdpa" if model._supports_sdpa else "eager"
if hasattr(model_sdpa, "vision_model") and hasattr(model_sdpa, "text_model"):
self.assertTrue(model_sdpa.vision_model.config._attn_implementation == vision_attn)
self.assertTrue(model_sdpa.text_model.config._attn_implementation == text_attn)
self.assertTrue(model_eager.vision_model.config._attn_implementation == "eager")
self.assertTrue(model_eager.text_model.config._attn_implementation == "eager")
self.assertTrue(model_sdpa.config._attn_implementation == "sdpa")
self.assertTrue(model_eager.config._attn_implementation == "eager")
for name, submodule in model_eager.named_modules():
class_name = submodule.__class__.__name__
if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
raise ValueError("The eager model should not have SDPA attention layers")
has_sdpa = False
for name, submodule in model_sdpa.named_modules():
class_name = submodule.__class__.__name__
if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
has_sdpa = True
break
if not has_sdpa and model_sdpa.config.model_type != "falcon":
raise ValueError("The SDPA model should have SDPA attention layers")
def test_eager_matches_sdpa_inference(
self,
torch_dtype: str,
@@ -132,23 +177,6 @@ class SiglipModelTesterMixin(ModelTesterMixin):
)
model_eager = model_eager.eval().to(torch_device)
self.assertTrue(model_sdpa.config._attn_implementation == "sdpa")
self.assertTrue(model_eager.config._attn_implementation == "eager")
for name, submodule in model_eager.named_modules():
class_name = submodule.__class__.__name__
if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
raise ValueError("The eager model should not have SDPA attention layers")
has_sdpa = False
for name, submodule in model_sdpa.named_modules():
class_name = submodule.__class__.__name__
if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
has_sdpa = True
break
if not has_sdpa and model_sdpa.config.model_type != "falcon":
raise ValueError("The SDPA model should have SDPA attention layers")
# We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving the model each time,
# but it would be nicer to have an efficient way to use parameterized.expand
cases = [
@@ -400,6 +428,10 @@ class SiglipVisionModelTest(SiglipModelTesterMixin, unittest.TestCase):
use_attention_mask_options=(False,),
)
@require_torch_sdpa
def test_sdpa_can_dispatch_composite_models(self):
super().test_sdpa_can_dispatch_composite_models()
class SiglipTextModelTester:
def __init__(
@@ -562,6 +594,10 @@ class SiglipTextModelTest(SiglipModelTesterMixin, unittest.TestCase):
use_attention_mask_options=(False, True),
)
@require_torch_sdpa
def test_sdpa_can_dispatch_composite_models(self):
super().test_sdpa_can_dispatch_composite_models()
class SiglipModelTester:
def __init__(self, parent, text_kwargs=None, vision_kwargs=None, is_training=True):
@@ -629,6 +665,7 @@ class SiglipModelTest(SiglipModelTesterMixin, PipelineTesterMixin, unittest.Test
test_cpu_offload = False
test_disk_offload_safetensors = False
test_disk_offload_bin = False
_is_composite = True
# Copied from tests.models.clip.test_modeling_clip.CLIPModelTest.setUp with CLIP->Siglip
def setUp(self):
@@ -851,6 +888,10 @@ class SiglipModelTest(SiglipModelTesterMixin, PipelineTesterMixin, unittest.Test
use_attention_mask_options=(False, True),
)
@require_torch_sdpa
def test_sdpa_can_dispatch_composite_models(self):
super().test_sdpa_can_dispatch_composite_models()
class SiglipForImageClassificationModelTester(SiglipModelTester):
def __init__(self, parent):
@@ -888,6 +929,7 @@ class SiglipForImageClassificationModelTest(SiglipModelTesterMixin, PipelineTest
test_cpu_offload = False
test_disk_offload_safetensors = False
test_disk_offload_bin = False
_is_composite = True
def setUp(self):
self.model_tester = SiglipForImageClassificationModelTester(self)
@@ -925,6 +967,10 @@ class SiglipForImageClassificationModelTest(SiglipModelTesterMixin, PipelineTest
torch_dtype=torch_dtype, logit_keys=("logits",), use_attention_mask_options=(False,)
)
@require_torch_sdpa
def test_sdpa_can_dispatch_composite_models(self):
super().test_sdpa_can_dispatch_composite_models()
# We will verify our results on an image of cute cats
def prepare_img():