committed by
GitHub
parent
8dd0a2b89c
commit
41b9b92b52
@@ -379,17 +379,18 @@ class Qwen2VLImageProcessor(BaseImageProcessor):
|
|||||||
- `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
|
- `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
min_pixels = min_pixels if min_pixels is not None else self.min_pixels
|
||||||
|
max_pixels = max_pixels if max_pixels is not None else self.max_pixels
|
||||||
|
|
||||||
if size is not None:
|
if size is not None:
|
||||||
if "shortest_edge" not in size or "longest_edge" not in size:
|
if "shortest_edge" not in size or "longest_edge" not in size:
|
||||||
raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
|
raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
|
||||||
min_pixels = size["shortest_edge"]
|
min_pixels = size["shortest_edge"]
|
||||||
|
elif min_pixels is not None and max_pixels is not None:
|
||||||
|
# backward compatibility: override size with min_pixels and max_pixels if they are provided
|
||||||
|
size = {"shortest_edge": min_pixels, "longest_edge": max_pixels}
|
||||||
else:
|
else:
|
||||||
size = {**self.size}
|
size = {**self.size}
|
||||||
# backward compatibility: override size with min_pixels and max_pixels if they are provided
|
|
||||||
if min_pixels is not None:
|
|
||||||
size["shortest_edge"] = min_pixels
|
|
||||||
if max_pixels is not None:
|
|
||||||
size["longest_edge"] = max_pixels
|
|
||||||
|
|
||||||
do_resize = do_resize if do_resize is not None else self.do_resize
|
do_resize = do_resize if do_resize is not None else self.do_resize
|
||||||
|
|
||||||
|
|||||||
@@ -334,17 +334,18 @@ class Qwen2VLImageProcessorFast(BaseImageProcessorFast):
|
|||||||
device (`torch.device`, *optional*):
|
device (`torch.device`, *optional*):
|
||||||
The device to process the images on. If unset, the device is inferred from the input images.
|
The device to process the images on. If unset, the device is inferred from the input images.
|
||||||
"""
|
"""
|
||||||
|
min_pixels = min_pixels if min_pixels is not None else self.min_pixels
|
||||||
|
max_pixels = max_pixels if max_pixels is not None else self.max_pixels
|
||||||
|
|
||||||
if size is not None:
|
if size is not None:
|
||||||
if "shortest_edge" not in size or "longest_edge" not in size:
|
if "shortest_edge" not in size or "longest_edge" not in size:
|
||||||
raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
|
raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
|
||||||
min_pixels = size["shortest_edge"]
|
min_pixels = size["shortest_edge"]
|
||||||
|
elif min_pixels is not None and max_pixels is not None:
|
||||||
|
# backward compatibility: override size with min_pixels and max_pixels if they are provided
|
||||||
|
size = {"shortest_edge": min_pixels, "longest_edge": max_pixels}
|
||||||
else:
|
else:
|
||||||
size = {**self.size}
|
size = {**self.size}
|
||||||
# backward compatibility: override size with min_pixels and max_pixels if they are provided
|
|
||||||
if min_pixels is not None:
|
|
||||||
size["shortest_edge"] = min_pixels
|
|
||||||
if max_pixels is not None:
|
|
||||||
size["longest_edge"] = max_pixels
|
|
||||||
|
|
||||||
do_resize = do_resize if do_resize is not None else self.do_resize
|
do_resize = do_resize if do_resize is not None else self.do_resize
|
||||||
size = size if size is not None else self.size
|
size = size if size is not None else self.size
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -298,6 +299,20 @@ class Qwen2VLImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
expected_output_video_shape = (171500, 1176)
|
expected_output_video_shape = (171500, 1176)
|
||||||
self.assertEqual(tuple(encoded_video.shape), expected_output_video_shape)
|
self.assertEqual(tuple(encoded_video.shape), expected_output_video_shape)
|
||||||
|
|
||||||
|
def test_custom_image_size(self):
|
||||||
|
for image_processing_class in self.image_processor_list:
|
||||||
|
image_processing = image_processing_class(**self.image_processor_dict)
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
|
image_processing.save_pretrained(tmpdirname)
|
||||||
|
image_processor_loaded = image_processing_class.from_pretrained(
|
||||||
|
tmpdirname, max_pixels=56 * 56, min_pixels=28 * 28
|
||||||
|
)
|
||||||
|
|
||||||
|
image_inputs = self.image_processor_tester.prepare_image_inputs(equal_resolution=True)
|
||||||
|
prcocess_out = image_processor_loaded(image_inputs, return_tensors="pt")
|
||||||
|
expected_output_video_shape = [112, 1176]
|
||||||
|
self.assertListEqual(list(prcocess_out.pixel_values.shape), expected_output_video_shape)
|
||||||
|
|
||||||
@require_vision
|
@require_vision
|
||||||
@require_torch
|
@require_torch
|
||||||
def test_slow_fast_equivalence(self):
|
def test_slow_fast_equivalence(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user