Attn implementation for composite models (#32238)

* first try * codestyle * idefics2 is happy * [run-slow] llava, llava_next, video_llava, vipllava, llava_next_video, idefics, idefics2, kosmos2, fuyu, blip, blip_2, instructblip, instructblipvideo, paligemma * fix-copies * [run-slow] llava, llava_next, video_llava, vipllava, llava_next_video, idefics, idefics2, kosmos2, fuyu, blip, blip_2, instructblip, instructblipvideo * blip-2 needs to init vision from config * when was this removed O_o * minor fix * tests * this way? * tests * model-agnostic code * codestyle * add tests for idefics * modify general test for VLMs * no generation test for vlm yet! * no generation test here also * wanr in VIT-SDPA if output attn * add more tests * user can pass dict as attn impl * repo consistency * update * muicgen * no prints * forgot speech enc-dec and clip * how many composite models we have? * musicgen meelody is same as mudicgen * +siglip * fix tests + add some more * remove idefics custom overriden code * make idefics2 automappable * nits * skip tests * doctests * Update src/transformers/models/idefics2/configuration_idefics2.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/clip/test_modeling_clip.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/idefics2/test_modeling_idefics2.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update tests/models/idefics2/test_modeling_idefics2.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Update src/transformers/configuration_utils.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * major update, no need for automap * clean up * add FA2 test * more tests * style * skip tests * why did these started failing now? * no attributes for FA2 needed * one tiny test * address comment about FA2 false warning * style * add new models and resolve conflicts * fix copies * let it be this way for now, come back tomorrow to review * some more fixes * update * more updates * update * fix copies * style and tests * another big update * fix tests * fix tests * update * another update * fix tests * fix copies * fix tests --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2024-10-22 06:54:44 +02:00
parent 32590b5ecb
commit 21d5025826
64 changed files with 1925 additions and 713 deletions
--- a/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py
+++ b/tests/models/speech_encoder_decoder/test_modeling_speech_encoder_decoder.py
@@ -18,7 +18,13 @@ import tempfile
 import unittest

 from transformers import is_torch_available
-from transformers.testing_utils import require_deterministic_for_xpu, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_deterministic_for_xpu,
+    require_torch,
+    require_torch_sdpa,
+    slow,
+    torch_device,
+)

 from ...test_modeling_common import floats_tensor, ids_tensor, random_attention_mask
 from ..bert.test_modeling_bert import BertModelTester
@@ -441,6 +447,66 @@ class EncoderDecoderMixin:
                max_diff = np.amax(np.abs(out_1 - out_2))
                self.assertLessEqual(max_diff, 1e-5)

+    @require_torch_sdpa
+    def test_sdpa_can_dispatch_composite_models(self):
+        inputs_dict = self.prepare_config_and_inputs()
+        encoder_config, decoder_config = inputs_dict["config"], inputs_dict["decoder_config"]
+        config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(
+            encoder_config=encoder_config, decoder_config=decoder_config
+        )
+        model = SpeechEncoderDecoderModel(config=config)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            model.save_pretrained(tmpdirname)
+            model_sdpa = SpeechEncoderDecoderModel.from_pretrained(tmpdirname)
+            model_sdpa = model_sdpa.eval().to(torch_device)
+
+            # see https://github.com/huggingface/transformers/pull/32238
+            # Sub-model will dispatch to SDPA if it can (checked below that `SDPA` layers are present)
+            encoder_attn = "sdpa" if model.encoder._supports_sdpa else "eager"
+            decoder_attn = "sdpa" if model.decoder._supports_sdpa else "eager"
+            self.assertTrue(model_sdpa.config._attn_implementation == "sdpa")
+            self.assertTrue(model_sdpa.encoder.config._attn_implementation == encoder_attn)
+            self.assertTrue(model_sdpa.decoder.config._attn_implementation == decoder_attn)
+
+            # Also test that nothing break if we request SDPA explicitly, when both sub-parts support it.
+            # If the model supports sdpa (i.e. all of sub-models supports it) we'll dispatch safely
+            # Otherwise we should raise error that SDPA is not supported, as some of the sub-models doesn't support
+            if encoder_attn == "sdpa" and decoder_attn == "sdpa":
+                model_sdpa_explicit = SpeechEncoderDecoderModel.from_pretrained(tmpdirname, attn_implementation="sdpa")
+                model_sdpa_explicit = model_sdpa_explicit.eval().to(torch_device)
+
+                self.assertTrue(model_sdpa_explicit.config._attn_implementation == "sdpa")
+            else:
+                with self.assertRaises(ValueError):
+                    model_sdpa_explicit = SpeechEncoderDecoderModel.from_pretrained(
+                        tmpdirname, attn_implementation="sdpa"
+                    )
+
+            model_eager = SpeechEncoderDecoderModel.from_pretrained(
+                tmpdirname,
+                attn_implementation="eager",
+            )
+            model_eager = model_eager.eval().to(torch_device)
+
+            self.assertTrue(model_eager.config._attn_implementation == "eager")
+            self.assertTrue(model_eager.encoder.config._attn_implementation == "eager")
+            self.assertTrue(model_eager.decoder.config._attn_implementation == "eager")
+
+            for name, submodule in model_eager.named_modules():
+                class_name = submodule.__class__.__name__
+                if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
+                    raise ValueError("The eager model should not have SDPA attention layers")
+
+            has_sdpa = False
+            for name, submodule in model_sdpa.named_modules():
+                class_name = submodule.__class__.__name__
+                if "SdpaAttention" in class_name or "SdpaSelfAttention" in class_name:
+                    has_sdpa = True
+                    break
+            if not has_sdpa:
+                raise ValueError("The SDPA model should have SDPA attention layers")
+

@require_torch
 class Wav2Vec2BertModelTest(EncoderDecoderMixin, unittest.TestCase):