From a1ad9197c5756858e9014a0e01fe5fb1791efdf2 Mon Sep 17 00:00:00 2001
From: Yoni Gozlan <74535834+yonigozlan@users.noreply.github.com>
Date: Sat, 12 Jul 2025 19:39:06 -0400
Subject: [PATCH] Fix overriding Fast Image/Video Processors instance
 attributes affect other instances (#39363)

* fix and add tests

* nit
---
 .../image_processing_utils_fast.py            |  3 +-
 src/transformers/video_processing_utils.py    |  6 +--
 tests/test_image_processing_common.py         | 39 ++++++++++++++++++-
 tests/test_video_processing_common.py         | 38 ++++++++++++++++++
 4 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py
index cb02ed2874..4f7865b2c9 100644
--- a/src/transformers/image_processing_utils_fast.py
+++ b/src/transformers/image_processing_utils_fast.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from collections.abc import Iterable
+from copy import deepcopy
 from functools import lru_cache, partial
 from typing import Any, Optional, TypedDict, Union
 
@@ -229,7 +230,7 @@ class BaseImageProcessorFast(BaseImageProcessor):
             if kwarg is not None:
                 setattr(self, key, kwarg)
             else:
-                setattr(self, key, getattr(self, key, None))
+                setattr(self, key, deepcopy(getattr(self, key, None)))
 
         # get valid kwargs names
         self._valid_kwargs_names = list(self.valid_kwargs.__annotations__.keys())
diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py
index 6caa2bfb65..715912846c 100644
--- a/src/transformers/video_processing_utils.py
+++ b/src/transformers/video_processing_utils.py
@@ -13,10 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import copy
 import json
 import os
 import warnings
+from copy import deepcopy
 from typing import Any, Optional, Union
 
 import numpy as np
@@ -202,7 +202,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
             if kwargs.get(key) is not None:
                 setattr(self, key, kwargs[key])
             else:
-                setattr(self, key, getattr(self, key, None))
+                setattr(self, key, deepcopy(getattr(self, key, None)))
 
     def __call__(self, videos, **kwargs) -> BatchFeature:
         return self.preprocess(videos, **kwargs)
@@ -774,7 +774,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
         Returns:
             `dict[str, Any]`: Dictionary of all the attributes that make up this video processor instance.
         """
-        output = copy.deepcopy(self.__dict__)
+        output = deepcopy(self.__dict__)
         output.pop("model_valid_processing_keys", None)
         output.pop("_valid_kwargs_names", None)
         output["video_processor_type"] = self.__class__.__name__
diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py
index 623414974f..8f159045e6 100644
--- a/tests/test_image_processing_common.py
+++ b/tests/test_image_processing_common.py
@@ -19,6 +19,7 @@ import pathlib
 import tempfile
 import time
 import warnings
+from copy import deepcopy
 
 import numpy as np
 import requests
@@ -559,6 +560,43 @@ class ImageProcessingTestMixin:
         if not is_tested:
             self.skipTest(reason="No validation found for `preprocess` method")
 
+    def test_override_instance_attributes_does_not_affect_other_instances(self):
+        if self.fast_image_processing_class is None:
+            self.skipTest(
+                "Only testing fast image processor, as most slow processors break this test and are to be deprecated"
+            )
+
+        image_processing_class = self.fast_image_processing_class
+        image_processor_1 = image_processing_class()
+        image_processor_2 = image_processing_class()
+        if not (hasattr(image_processor_1, "size") and isinstance(image_processor_1.size, dict)) or not (
+            hasattr(image_processor_1, "image_mean") and isinstance(image_processor_1.image_mean, list)
+        ):
+            self.skipTest(
+                reason="Skipping test as the image processor does not have dict size or list image_mean attributes"
+            )
+
+        original_size_2 = deepcopy(image_processor_2.size)
+        for key in image_processor_1.size:
+            image_processor_1.size[key] = -1
+        modified_copied_size_1 = deepcopy(image_processor_1.size)
+
+        original_image_mean_2 = deepcopy(image_processor_2.image_mean)
+        image_processor_1.image_mean[0] = -1
+        modified_copied_image_mean_1 = deepcopy(image_processor_1.image_mean)
+
+        # check that the original attributes of the second instance are not affected
+        self.assertEqual(image_processor_2.size, original_size_2)
+        self.assertEqual(image_processor_2.image_mean, original_image_mean_2)
+
+        for key in image_processor_2.size:
+            image_processor_2.size[key] = -2
+        image_processor_2.image_mean[0] = -2
+
+        # check that the modified attributes of the first instance are not affected by the second instance
+        self.assertEqual(image_processor_1.size, modified_copied_size_1)
+        self.assertEqual(image_processor_1.image_mean, modified_copied_image_mean_1)
+
     @slow
     @require_torch_accelerator
     @require_vision
@@ -575,7 +613,6 @@ class ImageProcessingTestMixin:
 
         image_processor = torch.compile(image_processor, mode="reduce-overhead")
         output_compiled = image_processor(input_image, device=torch_device, return_tensors="pt")
-        print(output_eager.pixel_values.dtype, output_compiled.pixel_values.dtype)
         self._assert_slow_fast_tensors_equivalence(
             output_eager.pixel_values, output_compiled.pixel_values, atol=1e-4, rtol=1e-4, mean_atol=1e-5
         )
diff --git a/tests/test_video_processing_common.py b/tests/test_video_processing_common.py
index ec6e059f91..8507108163 100644
--- a/tests/test_video_processing_common.py
+++ b/tests/test_video_processing_common.py
@@ -18,6 +18,7 @@ import json
 import os
 import tempfile
 import warnings
+from copy import deepcopy
 
 import numpy as np
 from packaging import version
@@ -448,3 +449,40 @@ class VideoProcessingTestMixin:
 
         if not is_tested:
             self.skipTest(reason="No validation found for `preprocess` method")
+
+    def test_override_instance_attributes_does_not_affect_other_instances(self):
+        if self.fast_video_processing_class is None:
+            self.skipTest(
+                "Only testing fast video processor, as most slow processors break this test and are to be deprecated"
+            )
+
+        video_processing_class = self.fast_video_processing_class
+        video_processor_1 = video_processing_class()
+        video_processor_2 = video_processing_class()
+        if not (hasattr(video_processor_1, "size") and isinstance(video_processor_1.size, dict)) or not (
+            hasattr(video_processor_1, "image_mean") and isinstance(video_processor_1.image_mean, list)
+        ):
+            self.skipTest(
+                reason="Skipping test as the image processor does not have dict size or list image_mean attributes"
+            )
+
+        original_size_2 = deepcopy(video_processor_2.size)
+        for key in video_processor_1.size:
+            video_processor_1.size[key] = -1
+        modified_copied_size_1 = deepcopy(video_processor_1.size)
+
+        original_image_mean_2 = deepcopy(video_processor_2.image_mean)
+        video_processor_1.image_mean[0] = -1
+        modified_copied_image_mean_1 = deepcopy(video_processor_1.image_mean)
+
+        # check that the original attributes of the second instance are not affected
+        self.assertEqual(video_processor_2.size, original_size_2)
+        self.assertEqual(video_processor_2.image_mean, original_image_mean_2)
+
+        for key in video_processor_2.size:
+            video_processor_2.size[key] = -2
+        video_processor_2.image_mean[0] = -2
+
+        # check that the modified attributes of the first instance are not affected by the second instance
+        self.assertEqual(video_processor_1.size, modified_copied_size_1)
+        self.assertEqual(video_processor_1.image_mean, modified_copied_image_mean_1)