Add support for nested images to LLava and VipLLava (#35558)

* move make_flat_list_of_images and make_batched_videos to image_utils

* remove unnecessary is_vision_available

* move make_nested_list_of_images to image_utils

* fix fast pixtral image processor

* fix import mllama

* fix make_nested_list_of_images

* add tests

* convert 4d arrays/tensors to list

* add test_make_batched_videos

* add support nested batch of videos

* fix image processing qwen2vl
This commit is contained in:
Yoni Gozlan
2025-01-30 16:49:20 -05:00
committed by GitHub
parent e4227eb4d4
commit d7188ba600
27 changed files with 506 additions and 485 deletions

View File

@@ -39,7 +39,7 @@ from transformers.testing_utils import (
slow,
torch_device,
)
from transformers.utils import is_torch_available, is_vision_available
from transformers.utils import is_torch_available
from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
@@ -58,10 +58,6 @@ if is_torch_available():
from transformers import InstructBlipVideoForConditionalGeneration, InstructBlipVideoVisionModel
if is_vision_available():
pass
class InstructBlipVideoVisionModelTester:
def __init__(
self,