[video processor] support torchcodec and decrease cuda memory usage (#38880)

* don't move the whole video to GPU

* add torchcodec

* add tests

* make style

* instrucblip as well

* consistency

* Update src/transformers/utils/import_utils.py

Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>

* Update src/transformers/utils/import_utils.py

Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>

* Update src/transformers/video_utils.py

Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>

---------

Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>
This commit is contained in:
Raushan Turganbay
2025-06-25 10:23:37 +02:00
committed by GitHub
parent 11d0feacce
commit e212ff9e6a
10 changed files with 129 additions and 9 deletions

View File

@@ -27,6 +27,7 @@ from transformers.testing_utils import (
require_cv2,
require_decord,
require_torch,
require_torchcodec,
require_torchvision,
require_vision,
)
@@ -261,6 +262,7 @@ class LoadVideoTester(unittest.TestCase):
@require_decord
@require_torchvision
@require_torchcodec
@require_cv2
def test_load_video_backend_url(self):
video, _ = load_video(
@@ -269,6 +271,12 @@ class LoadVideoTester(unittest.TestCase):
)
self.assertEqual(video.shape, (243, 360, 640, 3))
video, _ = load_video(
"https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/sample_demo_1.mp4",
backend="torchcodec",
)
self.assertEqual(video.shape, (243, 360, 640, 3))
# Can't use certain backends with url
with self.assertRaises(ValueError):
video, _ = load_video(
@@ -283,6 +291,7 @@ class LoadVideoTester(unittest.TestCase):
@require_decord
@require_torchvision
@require_torchcodec
@require_cv2
def test_load_video_backend_local(self):
video_file_path = hf_hub_download(
@@ -300,6 +309,10 @@ class LoadVideoTester(unittest.TestCase):
self.assertEqual(video.shape, (243, 360, 640, 3))
self.assertIsInstance(metadata, VideoMetadata)
video, metadata = load_video(video_file_path, backend="torchcodec")
self.assertEqual(video.shape, (243, 360, 640, 3))
self.assertIsInstance(metadata, VideoMetadata)
def test_load_video_num_frames(self):
video, _ = load_video(
"https://huggingface.co/datasets/raushan-testing-hf/videos-test/resolve/main/sample_demo_1.mp4",