From 7f5088503fb440cb3bb2d610f892e2ee547982b3 Mon Sep 17 00:00:00 2001 From: Name Date: Thu, 17 Oct 2024 17:27:34 +0200 Subject: [PATCH] removes decord (#33987) * removes decord dependency optimize np Revert "optimize" This reverts commit faa136b51ec4ec5858e5b0ae40eb7ef89a88b475. helpers as documentation pydoc missing keys * make fixup * require_av --------- Co-authored-by: ad --- docker/transformers-all-latest-gpu/Dockerfile | 2 +- setup.py | 3 +- src/transformers/__init__.py | 2 - src/transformers/dependency_versions_table.py | 1 - .../models/git/convert_git_to_pytorch.py | 43 ++++++++++++++----- src/transformers/testing_utils.py | 8 ---- src/transformers/utils/__init__.py | 1 - src/transformers/utils/import_utils.py | 10 ----- tests/test_pipeline_mixin.py | 6 +-- 9 files changed, 37 insertions(+), 39 deletions(-) diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index 9c5e3c9141..08e37ea6e1 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -43,7 +43,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum # For video model testing -RUN python3 -m pip install --no-cache-dir decord av==9.2.0 +RUN python3 -m pip install --no-cache-dir av==9.2.0 # Some slow tests require bnb RUN python3 -m pip install --no-cache-dir bitsandbytes diff --git a/setup.py b/setup.py index b1ffd0af63..1846f7bf97 100644 --- a/setup.py +++ b/setup.py @@ -104,7 +104,6 @@ _deps = [ "cookiecutter==1.7.3", "dataclasses", "datasets!=2.5.0", - "decord==0.6.0", "deepspeed>=0.9.3", "diffusers", "dill<0.3.5", @@ -313,7 +312,7 @@ extras["timm"] = deps_list("timm") extras["torch-vision"] = deps_list("torchvision") + extras["vision"] extras["natten"] = deps_list("natten") extras["codecarbon"] = deps_list("codecarbon") -extras["video"] = deps_list("decord", "av") +extras["video"] = deps_list("av") extras["sentencepiece"] = deps_list("sentencepiece", "protobuf") extras["tiktoken"] = deps_list("tiktoken", "blobfile") diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 236333fb1c..50400ed6c4 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -939,7 +939,6 @@ _import_structure = { "is_av_available", "is_bitsandbytes_available", "is_datasets_available", - "is_decord_available", "is_faiss_available", "is_flax_available", "is_keras_nlp_available", @@ -5855,7 +5854,6 @@ if TYPE_CHECKING: is_av_available, is_bitsandbytes_available, is_datasets_available, - is_decord_available, is_faiss_available, is_flax_available, is_keras_nlp_available, diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 6564e07903..5ce23f4b76 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -11,7 +11,6 @@ deps = { "cookiecutter": "cookiecutter==1.7.3", "dataclasses": "dataclasses", "datasets": "datasets!=2.5.0", - "decord": "decord==0.6.0", "deepspeed": "deepspeed>=0.9.3", "diffusers": "diffusers", "dill": "dill<0.3.5", diff --git a/src/transformers/models/git/convert_git_to_pytorch.py b/src/transformers/models/git/convert_git_to_pytorch.py index 238b8124a0..2f93a6b03a 100644 --- a/src/transformers/models/git/convert_git_to_pytorch.py +++ b/src/transformers/models/git/convert_git_to_pytorch.py @@ -19,6 +19,7 @@ URL: https://github.com/microsoft/GenerativeImage2Text/tree/main""" import argparse from pathlib import Path +import av import numpy as np import requests import torch @@ -193,10 +194,27 @@ def prepare_img(model_name): def prepare_video(): - from decord import VideoReader, cpu + def read_video_pyav(container, indices): + """ + Decode the video with PyAV decoder. - # set seed for reproducability - np.random.seed(0) + Args: + container (`av.container.input.InputContainer`): PyAV container. + indices (`List[int]`): List of frame indices to decode. + + Returns: + result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3). + """ + frames = [] + container.seek(0) + start_index = indices[0] + end_index = indices[-1] + for i, frame in enumerate(container.decode(video=0)): + if i > end_index: + break + if i >= start_index and i in indices: + frames.append(frame) + return np.stack([x.to_ndarray(format="rgb24") for x in frames]) def sample_frame_indices(clip_len, frame_sample_rate, seg_len): """ @@ -217,16 +235,19 @@ def prepare_video(): indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64) return indices - # video clip consists of 300 frames (10 seconds at 30 FPS) + # set seed for reproducibility + np.random.seed(0) + file_path = hf_hub_download(repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset") - videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0)) + with av.open(file_path) as container: + # sample 6 frames + num_frames = 6 + indices = sample_frame_indices( + clip_len=num_frames, frame_sample_rate=4, seg_len=container.streams.video[0].frames + ) + frames = read_video_pyav(container, indices) - # sample 6 frames - videoreader.seek(0) - indices = sample_frame_indices(clip_len=6, frame_sample_rate=4, seg_len=len(videoreader)) - video = videoreader.get_batch(indices).asnumpy() - - return video + return frames @torch.no_grad() diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 8eda45bd40..2fc22551d3 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -67,7 +67,6 @@ from .utils import ( is_compressed_tensors_available, is_cv2_available, is_cython_available, - is_decord_available, is_detectron2_available, is_eetq_available, is_essentia_available, @@ -758,13 +757,6 @@ def require_spacy(test_case): return unittest.skipUnless(is_spacy_available(), "test requires spacy")(test_case) -def require_decord(test_case): - """ - Decorator marking a test that requires decord. These tests are skipped when decord isn't installed. - """ - return unittest.skipUnless(is_decord_available(), "test requires decord")(test_case) - - def require_torch_multi_gpu(test_case): """ Decorator marking a test that requires a multi-GPU setup (in PyTorch). These tests are skipped on a machine without diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 3b33127be4..2876eef9ea 100755 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -128,7 +128,6 @@ from .import_utils import ( is_cv2_available, is_cython_available, is_datasets_available, - is_decord_available, is_detectron2_available, is_eetq_available, is_essentia_available, diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index fbc248824a..2f0cfe1d6d 100755 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -112,7 +112,6 @@ _coloredlogs_available = _is_package_available("coloredlogs") # `importlib.metadata.util` doesn't work with `opencv-python-headless`. _cv2_available = importlib.util.find_spec("cv2") is not None _datasets_available = _is_package_available("datasets") -_decord_available = importlib.util.find_spec("decord") is not None _detectron2_available = _is_package_available("detectron2") # We need to check both `faiss` and `faiss-cpu`. _faiss_available = importlib.util.find_spec("faiss") is not None @@ -1173,10 +1172,6 @@ def is_ccl_available(): return _is_ccl_available -def is_decord_available(): - return _decord_available - - def is_sudachi_available(): return _sudachipy_available @@ -1547,10 +1542,6 @@ PRETTY_MIDI_IMPORT_ERROR = """ Please note that you may need to restart your runtime after installation. """ -DECORD_IMPORT_ERROR = """ -{0} requires the decord library but it was not found in your environment. You can install it with pip: `pip install -decord`. Please note that you may need to restart your runtime after installation. -""" CYTHON_IMPORT_ERROR = """ {0} requires the Cython library but it was not found in your environment. You can install it with pip: `pip install @@ -1612,7 +1603,6 @@ BACKENDS_MAPPING = OrderedDict( ("scipy", (is_scipy_available, SCIPY_IMPORT_ERROR)), ("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)), ("oneccl_bind_pt", (is_ccl_available, CCL_IMPORT_ERROR)), - ("decord", (is_decord_available, DECORD_IMPORT_ERROR)), ("cython", (is_cython_available, CYTHON_IMPORT_ERROR)), ("jieba", (is_jieba_available, JIEBA_IMPORT_ERROR)), ("peft", (is_peft_available, PEFT_IMPORT_ERROR)), diff --git a/tests/test_pipeline_mixin.py b/tests/test_pipeline_mixin.py index cae285f5f1..74bc1b8669 100644 --- a/tests/test_pipeline_mixin.py +++ b/tests/test_pipeline_mixin.py @@ -51,7 +51,7 @@ from transformers.pipelines import ( ) from transformers.testing_utils import ( is_pipeline_test, - require_decord, + require_av, require_pytesseract, require_timm, require_torch, @@ -722,14 +722,14 @@ class PipelineTesterMixin: @is_pipeline_test @require_torch_or_tf @require_vision - @require_decord + @require_av def test_pipeline_video_classification(self): self.run_task_tests(task="video-classification") @is_pipeline_test @require_vision - @require_decord @require_torch + @require_av def test_pipeline_video_classification_fp16(self): self.run_task_tests(task="video-classification", torch_dtype="float16")