From 30409af6e1b2b5efb6d9932b3e3b4ce20cfdb30e Mon Sep 17 00:00:00 2001 From: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 3 Aug 2023 11:01:10 +0100 Subject: [PATCH] Update InstructBLIP & Align values after rescale update (#25209) * Update InstructBLIP values Note: the tests are not independent. Running the test independentely produces different logits compared to running all the integration tests * Update test values after rescale update * Remove left over commented out code * Revert to previous rescaling logic * Update rescale tests --- .../models/efficientnet/image_processing_efficientnet.py | 8 ++++---- src/transformers/models/vivit/image_processing_vivit.py | 8 ++++---- .../efficientnet/test_image_processing_efficientnet.py | 4 ++-- tests/models/instructblip/test_modeling_instructblip.py | 6 +++--- tests/models/vivit/test_image_processing_vivit.py | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet.py b/src/transformers/models/efficientnet/image_processing_efficientnet.py index 8873a80069..9a19ab1ff6 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py @@ -155,10 +155,11 @@ class EfficientNetImageProcessor(BaseImageProcessor): """ Rescale an image by a scale factor. - If offset is True, the image is rescaled between [-1, 1]. - image = image * scale * 2 - 1 + If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is + 1/127.5, the image is rescaled between [-1, 1]. + image = image * scale - 1 - If offset is False, the image is rescaled between [0, 1]. + If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1]. image = image * scale Args: @@ -171,7 +172,6 @@ class EfficientNetImageProcessor(BaseImageProcessor): data_format (`str` or `ChannelDimension`, *optional*): The channel dimension format of the image. If not provided, it will be the same as the input image. """ - scale = scale * 2 if offset else scale rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs) if offset: diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index 41666e9999..0790c5d82b 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -179,10 +179,11 @@ class VivitImageProcessor(BaseImageProcessor): """ Rescale an image by a scale factor. - If offset is True, the image is rescaled between [-1, 1]. - image = image * scale * 2 - 1 + If `offset` is `True`, the image has its values rescaled by `scale` and then offset by 1. If `scale` is + 1/127.5, the image is rescaled between [-1, 1]. + image = image * scale - 1 - If offset is False, the image is rescaled between [0, 1]. + If `offset` is `False`, and `scale` is 1/255, the image is rescaled between [0, 1]. image = image * scale Args: @@ -195,7 +196,6 @@ class VivitImageProcessor(BaseImageProcessor): data_format (`str` or `ChannelDimension`, *optional*): The channel dimension format of the image. If not provided, it will be the same as the input image. """ - scale = scale * 2 if offset else scale rescaled_image = rescale(image, scale=scale, data_format=data_format, **kwargs) if offset: diff --git a/tests/models/efficientnet/test_image_processing_efficientnet.py b/tests/models/efficientnet/test_image_processing_efficientnet.py index 3e427474f6..bc65e7acbf 100644 --- a/tests/models/efficientnet/test_image_processing_efficientnet.py +++ b/tests/models/efficientnet/test_image_processing_efficientnet.py @@ -200,8 +200,8 @@ class EfficientNetImageProcessorTest(ImageProcessingSavingTestMixin, unittest.Te image_processor = self.image_processing_class(**self.image_processor_dict) - rescaled_image = image_processor.rescale(image, scale=1 / 255) - expected_image = (image * (2 / 255.0)).astype(np.float32) - 1 + rescaled_image = image_processor.rescale(image, scale=1 / 127.5) + expected_image = (image * (1 / 127.5)).astype(np.float32) - 1 self.assertTrue(np.allclose(rescaled_image, expected_image)) rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False) diff --git a/tests/models/instructblip/test_modeling_instructblip.py b/tests/models/instructblip/test_modeling_instructblip.py index 49d780918c..f8ce2b22e8 100644 --- a/tests/models/instructblip/test_modeling_instructblip.py +++ b/tests/models/instructblip/test_modeling_instructblip.py @@ -538,7 +538,7 @@ class InstructBlipModelIntegrationTest(unittest.TestCase): logits = model(**inputs).logits expected_slice = torch.tensor( - [[-3.5020, -12.3281, 8.4453], [-5.1406, -11.9609, 7.8711], [-4.0430, -13.4375, 9.1172]], + [[-3.4727, -11.8203, 8.3828], [-5.1172, -11.3438, 7.7656], [-4.0742, -13.4688, 9.1953]], device=torch_device, ) self.assertTrue(torch.allclose(logits[0, :3, :3].float(), expected_slice, atol=1e-3)) @@ -548,12 +548,12 @@ class InstructBlipModelIntegrationTest(unittest.TestCase): generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip() # fmt: off - expected_outputs = [ 2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 1623, 263, 19587, 4272, 11952, 29889] + expected_outputs = [2, 450, 22910, 9565, 310, 445, 1967, 338, 393, 263, 767, 338, 13977, 292, 22095, 373, 278, 1250, 310, 263, 13328, 20134, 29963, 1550, 19500, 373, 263, 19587, 4272, 11952, 29889] # fmt: on self.assertEqual(outputs[0].tolist(), expected_outputs) self.assertEqual( generated_text, - "The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving down a busy city street.", + "The unusual aspect of this image is that a man is ironing clothes on the back of a yellow SUV while driving on a busy city street.", ) def test_inference_flant5_xl(self): diff --git a/tests/models/vivit/test_image_processing_vivit.py b/tests/models/vivit/test_image_processing_vivit.py index 0b445cf474..6954734748 100644 --- a/tests/models/vivit/test_image_processing_vivit.py +++ b/tests/models/vivit/test_image_processing_vivit.py @@ -219,8 +219,8 @@ class VivitImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase image_processor = self.image_processing_class(**self.image_processor_dict) - rescaled_image = image_processor.rescale(image, scale=1 / 255) - expected_image = (image * (2 / 255.0)).astype(np.float32) - 1 + rescaled_image = image_processor.rescale(image, scale=1 / 127.5) + expected_image = (image * (1 / 127.5)).astype(np.float32) - 1 self.assertTrue(np.allclose(rescaled_image, expected_image)) rescaled_image = image_processor.rescale(image, scale=1 / 255, offset=False)