🔴 [VLM] Add base model without head (#37033)

* i guessreverted all CdGen classes * style * llava onevision * fix copies * fix some tests * some more tests * dump * skip these * nevermind, i am dumb * revert fix not needed * fixup * fixup * another fixup * more fixup to make ci finally happy * fixup after rebasing * fix qwen tests * add internVL + typos here and there * image token index -> id * style * fix init weights * revert blip-2 not supported * address comments * fix copies * revert blip2 test file as well * as discussed internally, revert back CdGen models * fix some tests * fix more tests for compile * CI red * fix copies * enumerate explicitly allowed models * address comments * fix tests * fixup * style again * add tests for new model class * another fixup ( x _ x ) * [fixup] unused attributes can be removed post-deprecation
2025-05-07 17:47:51 +02:00
parent 3fa8d9c20e
commit 17742bd9c8
85 changed files with 7590 additions and 2904 deletions
--- a/tests/models/instructblip/test_modeling_instructblip.py
+++ b/tests/models/instructblip/test_modeling_instructblip.py
@@ -54,7 +54,7 @@ if is_torch_available():
    import torch
    from torch import nn

-    from transformers import InstructBlipForConditionalGeneration, InstructBlipVisionModel
+    from transformers import InstructBlipForConditionalGeneration, InstructBlipModel, InstructBlipVisionModel


 if is_vision_available():
@@ -460,14 +460,20 @@ class InstructBlipForConditionalGenerationDecoderOnlyModelTester:
            "attention_mask": attention_mask,
            "qformer_input_ids": qformer_input_ids,
            "qformer_attention_mask": qformer_attention_mask,
-            "labels": input_ids,
        }
        return config, inputs_dict


@require_torch
 class InstructBlipForConditionalGenerationDecoderOnlyTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
-    all_model_classes = (InstructBlipForConditionalGeneration,) if is_torch_available() else ()
+    all_model_classes = (
+        (
+            InstructBlipModel,
+            InstructBlipForConditionalGeneration,
+        )
+        if is_torch_available()
+        else ()
+    )
    pipeline_model_mapping = {"image-text-to-text": InstructBlipForConditionalGeneration}
    fx_compatible = False
    test_head_masking = False