From d68c46026b4d0a8a6406439250ac51c334ae132b Mon Sep 17 00:00:00 2001 From: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Date: Fri, 4 Nov 2022 19:14:56 +0000 Subject: [PATCH] Update defaults and logic to match old FE (#20065) * Update defaults and logic to match old FE * Use docker run rest values --- .../layoutlmv3/image_processing_layoutlmv3.py | 1 - .../perceiver/image_processing_perceiver.py | 16 ++++++++-------- .../poolformer/image_processing_poolformer.py | 8 ++++---- .../segformer/image_processing_segformer.py | 12 ++++++------ .../models/perceiver/test_modeling_perceiver.py | 2 +- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index 75dd1bdf89..2c74d8ed9b 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -361,7 +361,6 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor): images = [self.normalize(image=image, mean=image_mean, std=image_std) for image in images] # flip color channels from RGB to BGR (as Detectron2 requires this) - images = [flip_channel_order(image) for image in images] images = [to_channel_dimension_format(image, data_format) for image in images] data = BatchFeature(data={"pixel_values": images}, tensor_type=return_tensors) diff --git a/src/transformers/models/perceiver/image_processing_perceiver.py b/src/transformers/models/perceiver/image_processing_perceiver.py index a6a7b6c769..aa916a882d 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver.py +++ b/src/transformers/models/perceiver/image_processing_perceiver.py @@ -24,8 +24,8 @@ from transformers.utils.generic import TensorType from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict from ...image_transforms import center_crop, normalize, rescale, resize, to_channel_dimension_format from ...image_utils import ( - IMAGENET_STANDARD_MEAN, - IMAGENET_STANDARD_STD, + IMAGENET_DEFAULT_MEAN, + IMAGENET_DEFAULT_STD, ChannelDimension, ImageInput, PILImageResampling, @@ -61,7 +61,7 @@ class PerceiverImageProcessor(BaseImageProcessor): parameter in the `preprocess` method. size (`Dict[str, int]` *optional*, defaults to `{"height": 224, "width": 224}`): Size of the image after resizing. Can be overridden by the `size` parameter in the `preprocess` method. - resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`): + resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`): Defines the resampling filter to use if resizing the image. Can be overridden by the `resample` parameter in the `preprocess` method. do_rescale (`bool`, *optional*, defaults to `True`): @@ -89,7 +89,7 @@ class PerceiverImageProcessor(BaseImageProcessor): crop_size: Dict[str, int] = None, do_resize: bool = True, size: Dict[str, int] = None, - resample: PILImageResampling = PILImageResampling.BILINEAR, + resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: Union[int, float] = 1 / 255, do_normalize: bool = True, @@ -111,8 +111,8 @@ class PerceiverImageProcessor(BaseImageProcessor): self.do_rescale = do_rescale self.rescale_factor = rescale_factor self.do_normalize = do_normalize - self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN - self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN + self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD def center_crop( self, @@ -153,7 +153,7 @@ class PerceiverImageProcessor(BaseImageProcessor): self, image: np.ndarray, size: Dict[str, int], - resample: PILImageResampling = PIL.Image.BILINEAR, + resample: PILImageResampling = PIL.Image.BICUBIC, data_format: Optional[Union[str, ChannelDimension]] = None, **kwargs ) -> np.ndarray: @@ -165,7 +165,7 @@ class PerceiverImageProcessor(BaseImageProcessor): Image to resize. size (`Dict[str, int]`): Size of the output image. - resample (`PILImageResampling`, *optional*, defaults to `PIL.Image.BILINEAR`): + resample (`PILImageResampling`, *optional*, defaults to `PIL.Image.BICUBIC`): Resampling filter to use when resizing the image. data_format (`str` or `ChannelDimension`, *optional*): The channel dimension format of the image. If not provided, it will be the same as the input image. diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index 1cb62b44ea..330f436ce2 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -31,8 +31,8 @@ from ...image_transforms import ( to_channel_dimension_format, ) from ...image_utils import ( - IMAGENET_STANDARD_MEAN, - IMAGENET_STANDARD_STD, + IMAGENET_DEFAULT_MEAN, + IMAGENET_DEFAULT_STD, ChannelDimension, ImageInput, PILImageResampling, @@ -133,8 +133,8 @@ class PoolFormerImageProcessor(BaseImageProcessor): self.do_rescale = do_rescale self.rescale_factor = rescale_factor self.do_normalize = do_normalize - self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN - self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN + self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD def resize( self, diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py index a8abc7428a..72d5c9f124 100644 --- a/src/transformers/models/segformer/image_processing_segformer.py +++ b/src/transformers/models/segformer/image_processing_segformer.py @@ -25,8 +25,8 @@ from transformers.utils.generic import TensorType from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict from ...image_transforms import center_crop, normalize, rescale, resize, to_channel_dimension_format from ...image_utils import ( - IMAGENET_STANDARD_MEAN, - IMAGENET_STANDARD_STD, + IMAGENET_DEFAULT_MEAN, + IMAGENET_DEFAULT_STD, ChannelDimension, ImageInput, PILImageResampling, @@ -115,15 +115,15 @@ class SegformerImageProcessor(BaseImageProcessor): self.do_rescale = do_rescale self.rescale_factor = rescale_factor self.do_normalize = do_normalize - self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN - self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD + self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN + self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.do_reduce_labels = do_reduce_labels def resize( self, image: np.ndarray, size: Dict[str, int], - resample: PILImageResampling = PILImageResampling.BICUBIC, + resample: PILImageResampling = PILImageResampling.BILINEAR, data_format: Optional[Union[str, ChannelDimension]] = None, **kwargs ) -> np.ndarray: @@ -135,7 +135,7 @@ class SegformerImageProcessor(BaseImageProcessor): Image to resize. size (`Dict[str, int]`): Size of the output image. - resample (`PILImageResampling`, *optional*, defaults to `PIL.Image.BICUBIC`): + resample (`PILImageResampling`, *optional*, defaults to `PIL.Image.BILINEAR`): Resampling filter to use when resiizing the image. data_format (`str` or `ChannelDimension`, *optional*): The channel dimension format of the image. If not provided, it will be the same as the input image. diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index 5947a73a0e..5f69b9ff69 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -903,7 +903,7 @@ class PerceiverModelIntegrationTest(unittest.TestCase): expected_shape = torch.Size((1, model.config.num_labels)) self.assertEqual(logits.shape, expected_shape) - expected_slice = torch.tensor([-1.1653, -0.1993, -0.7521], device=torch_device) + expected_slice = torch.tensor([-1.1652, -0.1992, -0.7520], device=torch_device) self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=1e-4))