🔴 [VLM] Add base model without head (#37033)

* i guessreverted all CdGen classes

* style

* llava onevision

* fix copies

* fix some tests

* some more tests

* dump

* skip these

* nevermind, i am dumb

* revert fix not needed

* fixup

* fixup

* another fixup

* more fixup to make ci finally happy

* fixup after rebasing

* fix qwen tests

* add internVL + typos here and there

* image token index -> id

* style

* fix init weights

* revert blip-2 not supported

* address comments

* fix copies

* revert blip2 test file as well

* as discussed internally, revert back CdGen models

* fix some tests

* fix more tests for compile

* CI red

* fix copies

* enumerate explicitly allowed models

* address comments

* fix tests

* fixup

* style again

* add tests for new model class

* another fixup ( x _ x )

* [fixup] unused attributes can be removed post-deprecation
This commit is contained in:
Raushan Turganbay
2025-05-07 17:47:51 +02:00
committed by GitHub
parent 3fa8d9c20e
commit 17742bd9c8
85 changed files with 7590 additions and 2904 deletions

View File

@@ -54,7 +54,11 @@ if is_torch_available():
import torch
from torch import nn
from transformers import InstructBlipVideoForConditionalGeneration, InstructBlipVideoVisionModel
from transformers import (
InstructBlipVideoForConditionalGeneration,
InstructBlipVideoModel,
InstructBlipVideoVisionModel,
)
class InstructBlipVideoVisionModelTester:
@@ -477,7 +481,6 @@ class InstructBlipVideoForConditionalGenerationDecoderOnlyModelTester:
"attention_mask": attention_mask,
"qformer_input_ids": qformer_input_ids,
"qformer_attention_mask": qformer_attention_mask,
"labels": input_ids,
}
return config, inputs_dict
@@ -486,7 +489,9 @@ class InstructBlipVideoForConditionalGenerationDecoderOnlyModelTester:
class InstructBlipVideoForConditionalGenerationDecoderOnlyTest(
ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
):
all_model_classes = (InstructBlipVideoForConditionalGeneration,) if is_torch_available() else ()
all_model_classes = (
(InstructBlipVideoForConditionalGeneration, InstructBlipVideoModel) if is_torch_available() else ()
)
fx_compatible = False
test_head_masking = False
test_pruning = False