Add test_image_processing_common.py (#20785)

* Add test_image_processing_common.py * Fix typo * Update imports and test fetcher * Revert but keep test fetcher update * Fix imports * Fix all imports * Formatting fix * Update tests/test_image_processing_common.py
2023-01-23 13:48:30 +00:00
parent 96b2b2de12
commit 66459ce319
30 changed files with 367 additions and 168 deletions
--- a/tests/test_feature_extraction_common.py
+++ b/tests/test_feature_extraction_common.py
@@ -25,16 +25,7 @@ from pathlib import Path
 from huggingface_hub import HfFolder, delete_repo, set_access_token
 from requests.exceptions import HTTPError
 from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
-from transformers.testing_utils import (
-    TOKEN,
-    USER,
-    check_json_file_has_correct_format,
-    get_tests_dir,
-    is_staging_test,
-    require_torch,
-    require_vision,
-)
-from transformers.utils import is_torch_available, is_vision_available
+from transformers.testing_utils import TOKEN, USER, check_json_file_has_correct_format, get_tests_dir, is_staging_test


 sys.path.append(str(Path(__file__).parent.parent / "utils"))
@@ -42,105 +33,9 @@ sys.path.append(str(Path(__file__).parent.parent / "utils"))
 from test_module.custom_feature_extraction import CustomFeatureExtractor  # noqa E402


-if is_torch_available():
-    import numpy as np
-    import torch
-
-if is_vision_available():
-    from PIL import Image
-
-
 SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = get_tests_dir("fixtures")


-def prepare_image_inputs(feature_extract_tester, equal_resolution=False, numpify=False, torchify=False):
-    """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-    or a list of PyTorch tensors if one specifies torchify=True.
-
-    One can specify whether the images are of the same resolution or not.
-    """
-
-    assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time"
-
-    image_inputs = []
-    for i in range(feature_extract_tester.batch_size):
-        if equal_resolution:
-            width = height = feature_extract_tester.max_resolution
-        else:
-            # To avoid getting image width/height 0
-            min_resolution = feature_extract_tester.min_resolution
-            if getattr(feature_extract_tester, "size_divisor", None):
-                # If `size_divisor` is defined, the image needs to have width/size >= `size_divisor`
-                min_resolution = max(feature_extract_tester.size_divisor, min_resolution)
-            width, height = np.random.choice(np.arange(min_resolution, feature_extract_tester.max_resolution), 2)
-        image_inputs.append(
-            np.random.randint(
-                255,
-                size=(
-                    feature_extract_tester.num_channels,
-                    width,
-                    height,
-                ),
-                dtype=np.uint8,
-            )
-        )
-
-    if not numpify and not torchify:
-        # PIL expects the channel dimension as last dimension
-        image_inputs = [Image.fromarray(np.moveaxis(image, 0, -1)) for image in image_inputs]
-
-    if torchify:
-        image_inputs = [torch.from_numpy(image) for image in image_inputs]
-
-    return image_inputs
-
-
-def prepare_video(feature_extract_tester, width=10, height=10, numpify=False, torchify=False):
-    """This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors."""
-
-    video = []
-    for i in range(feature_extract_tester.num_frames):
-        video.append(np.random.randint(255, size=(feature_extract_tester.num_channels, width, height), dtype=np.uint8))
-
-    if not numpify and not torchify:
-        # PIL expects the channel dimension as last dimension
-        video = [Image.fromarray(np.moveaxis(frame, 0, -1)) for frame in video]
-
-    if torchify:
-        video = [torch.from_numpy(frame) for frame in video]
-
-    return video
-
-
-def prepare_video_inputs(feature_extract_tester, equal_resolution=False, numpify=False, torchify=False):
-    """This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if
-    one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True.
-
-    One can specify whether the videos are of the same resolution or not.
-    """
-
-    assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time"
-
-    video_inputs = []
-    for i in range(feature_extract_tester.batch_size):
-        if equal_resolution:
-            width = height = feature_extract_tester.max_resolution
-        else:
-            width, height = np.random.choice(
-                np.arange(feature_extract_tester.min_resolution, feature_extract_tester.max_resolution), 2
-            )
-            video = prepare_video(
-                feature_extract_tester=feature_extract_tester,
-                width=width,
-                height=height,
-                numpify=numpify,
-                torchify=torchify,
-            )
-        video_inputs.append(video)
-
-    return video_inputs
-
-
 class FeatureExtractionSavingTestMixin:
    test_cast_dtype = None

@@ -174,41 +69,6 @@ class FeatureExtractionSavingTestMixin:
        feat_extract = self.feature_extraction_class()
        self.assertIsNotNone(feat_extract)

-    @require_torch
-    @require_vision
-    def test_cast_dtype_device(self):
-        if self.test_cast_dtype is not None:
-            # Initialize feature_extractor
-            feature_extractor = self.feature_extraction_class(**self.feat_extract_dict)
-
-            # create random PyTorch tensors
-            image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True)
-
-            encoding = feature_extractor(image_inputs, return_tensors="pt")
-            # for layoutLM compatiblity
-            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
-            self.assertEqual(encoding.pixel_values.dtype, torch.float32)
-
-            encoding = feature_extractor(image_inputs, return_tensors="pt").to(torch.float16)
-            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
-            self.assertEqual(encoding.pixel_values.dtype, torch.float16)
-
-            encoding = feature_extractor(image_inputs, return_tensors="pt").to("cpu", torch.bfloat16)
-            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
-            self.assertEqual(encoding.pixel_values.dtype, torch.bfloat16)
-
-            with self.assertRaises(TypeError):
-                _ = feature_extractor(image_inputs, return_tensors="pt").to(torch.bfloat16, "cpu")
-
-            # Try with text + image feature
-            encoding = feature_extractor(image_inputs, return_tensors="pt")
-            encoding.update({"input_ids": torch.LongTensor([[1, 2, 3], [4, 5, 6]])})
-            encoding = encoding.to(torch.float16)
-
-            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
-            self.assertEqual(encoding.pixel_values.dtype, torch.float16)
-            self.assertEqual(encoding.input_ids.dtype, torch.long)
-

 class FeatureExtractorUtilTester(unittest.TestCase):
    def test_cached_files_are_used_when_internet_is_down(self):