removes decord (#33987)

* removes decord dependency optimize np Revert "optimize" This reverts commit faa136b51ec4ec5858e5b0ae40eb7ef89a88b475. helpers as documentation pydoc missing keys * make fixup * require_av --------- Co-authored-by: ad <hi@arnaudiaz.com>
2024-10-17 17:27:34 +02:00
parent f2846ad2b7
commit 7f5088503f
9 changed files with 37 additions and 39 deletions
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@@ -43,7 +43,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum

 # For video model testing
-RUN python3 -m pip install --no-cache-dir decord av==9.2.0
+RUN python3 -m pip install --no-cache-dir av==9.2.0

 # Some slow tests require bnb
 RUN python3 -m pip install --no-cache-dir bitsandbytes
--- a/setup.py
+++ b/setup.py
@@ -104,7 +104,6 @@ _deps = [
    "cookiecutter==1.7.3",
    "dataclasses",
    "datasets!=2.5.0",
-    "decord==0.6.0",
    "deepspeed>=0.9.3",
    "diffusers",
    "dill<0.3.5",
@@ -313,7 +312,7 @@ extras["timm"] = deps_list("timm")
 extras["torch-vision"] = deps_list("torchvision") + extras["vision"]
 extras["natten"] = deps_list("natten")
 extras["codecarbon"] = deps_list("codecarbon")
-extras["video"] = deps_list("decord", "av")
+extras["video"] = deps_list("av")

 extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
 extras["tiktoken"] = deps_list("tiktoken", "blobfile")
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -939,7 +939,6 @@ _import_structure = {
        "is_av_available",
        "is_bitsandbytes_available",
        "is_datasets_available",
-        "is_decord_available",
        "is_faiss_available",
        "is_flax_available",
        "is_keras_nlp_available",
@@ -5855,7 +5854,6 @@ if TYPE_CHECKING:
        is_av_available,
        is_bitsandbytes_available,
        is_datasets_available,
-        is_decord_available,
        is_faiss_available,
        is_flax_available,
        is_keras_nlp_available,
--- a/src/transformers/dependency_versions_table.py
+++ b/src/transformers/dependency_versions_table.py
@@ -11,7 +11,6 @@ deps = {
    "cookiecutter": "cookiecutter==1.7.3",
    "dataclasses": "dataclasses",
    "datasets": "datasets!=2.5.0",
-    "decord": "decord==0.6.0",
    "deepspeed": "deepspeed>=0.9.3",
    "diffusers": "diffusers",
    "dill": "dill<0.3.5",
--- a/src/transformers/models/git/convert_git_to_pytorch.py
+++ b/src/transformers/models/git/convert_git_to_pytorch.py
@@ -19,6 +19,7 @@ URL: https://github.com/microsoft/GenerativeImage2Text/tree/main"""
 import argparse
 from pathlib import Path

+import av
 import numpy as np
 import requests
 import torch
@@ -193,10 +194,27 @@ def prepare_img(model_name):


 def prepare_video():
-    from decord import VideoReader, cpu
+    def read_video_pyav(container, indices):
+        """
+        Decode the video with PyAV decoder.

-    # set seed for reproducability
-    np.random.seed(0)
+        Args:
+            container (`av.container.input.InputContainer`): PyAV container.
+            indices (`List[int]`): List of frame indices to decode.
+
+        Returns:
+            result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
+        """
+        frames = []
+        container.seek(0)
+        start_index = indices[0]
+        end_index = indices[-1]
+        for i, frame in enumerate(container.decode(video=0)):
+            if i > end_index:
+                break
+            if i >= start_index and i in indices:
+                frames.append(frame)
+        return np.stack([x.to_ndarray(format="rgb24") for x in frames])

    def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
        """
@@ -217,16 +235,19 @@ def prepare_video():
        indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
        return indices

-    # video clip consists of 300 frames (10 seconds at 30 FPS)
+    # set seed for reproducibility
+    np.random.seed(0)
+
    file_path = hf_hub_download(repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset")
-    videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0))
+    with av.open(file_path) as container:
+        # sample 6 frames
+        num_frames = 6
+        indices = sample_frame_indices(
+            clip_len=num_frames, frame_sample_rate=4, seg_len=container.streams.video[0].frames
+        )
+        frames = read_video_pyav(container, indices)

-    # sample 6 frames
-    videoreader.seek(0)
-    indices = sample_frame_indices(clip_len=6, frame_sample_rate=4, seg_len=len(videoreader))
-    video = videoreader.get_batch(indices).asnumpy()
-
-    return video
+        return frames


@torch.no_grad()
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -67,7 +67,6 @@ from .utils import (
    is_compressed_tensors_available,
    is_cv2_available,
    is_cython_available,
-    is_decord_available,
    is_detectron2_available,
    is_eetq_available,
    is_essentia_available,
@@ -758,13 +757,6 @@ def require_spacy(test_case):
    return unittest.skipUnless(is_spacy_available(), "test requires spacy")(test_case)


-def require_decord(test_case):
-    """
-    Decorator marking a test that requires decord. These tests are skipped when decord isn't installed.
-    """
-    return unittest.skipUnless(is_decord_available(), "test requires decord")(test_case)
-
-
 def require_torch_multi_gpu(test_case):
    """
    Decorator marking a test that requires a multi-GPU setup (in PyTorch). These tests are skipped on a machine without
--- a/src/transformers/utils/init.py
+++ b/src/transformers/utils/init.py
@@ -128,7 +128,6 @@ from .import_utils import (
    is_cv2_available,
    is_cython_available,
    is_datasets_available,
-    is_decord_available,
    is_detectron2_available,
    is_eetq_available,
    is_essentia_available,
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@@ -112,7 +112,6 @@ _coloredlogs_available = _is_package_available("coloredlogs")
 # `importlib.metadata.util` doesn't work with `opencv-python-headless`.
 _cv2_available = importlib.util.find_spec("cv2") is not None
 _datasets_available = _is_package_available("datasets")
-_decord_available = importlib.util.find_spec("decord") is not None
 _detectron2_available = _is_package_available("detectron2")
 # We need to check both `faiss` and `faiss-cpu`.
 _faiss_available = importlib.util.find_spec("faiss") is not None
@@ -1173,10 +1172,6 @@ def is_ccl_available():
    return _is_ccl_available


-def is_decord_available():
-    return _decord_available
-
-
 def is_sudachi_available():
    return _sudachipy_available

@@ -1547,10 +1542,6 @@ PRETTY_MIDI_IMPORT_ERROR = """
 Please note that you may need to restart your runtime after installation.
 """

-DECORD_IMPORT_ERROR = """
-{0} requires the decord library but it was not found in your environment. You can install it with pip: `pip install
-decord`. Please note that you may need to restart your runtime after installation.
-"""

 CYTHON_IMPORT_ERROR = """
 {0} requires the Cython library but it was not found in your environment. You can install it with pip: `pip install
@@ -1612,7 +1603,6 @@ BACKENDS_MAPPING = OrderedDict(
        ("scipy", (is_scipy_available, SCIPY_IMPORT_ERROR)),
        ("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)),
        ("oneccl_bind_pt", (is_ccl_available, CCL_IMPORT_ERROR)),
-        ("decord", (is_decord_available, DECORD_IMPORT_ERROR)),
        ("cython", (is_cython_available, CYTHON_IMPORT_ERROR)),
        ("jieba", (is_jieba_available, JIEBA_IMPORT_ERROR)),
        ("peft", (is_peft_available, PEFT_IMPORT_ERROR)),
--- a/tests/test_pipeline_mixin.py
+++ b/tests/test_pipeline_mixin.py
@@ -51,7 +51,7 @@ from transformers.pipelines import (
 )
 from transformers.testing_utils import (
    is_pipeline_test,
-    require_decord,
+    require_av,
    require_pytesseract,
    require_timm,
    require_torch,
@@ -722,14 +722,14 @@ class PipelineTesterMixin:
    @is_pipeline_test
    @require_torch_or_tf
    @require_vision
-    @require_decord
+    @require_av
    def test_pipeline_video_classification(self):
        self.run_task_tests(task="video-classification")

    @is_pipeline_test
    @require_vision
-    @require_decord
    @require_torch
+    @require_av
    def test_pipeline_video_classification_fp16(self):
        self.run_task_tests(task="video-classification", torch_dtype="float16")