Update defaults and logic to match old FE (#20065)
* Update defaults and logic to match old FE * Use docker run rest values
This commit is contained in:
@@ -361,7 +361,6 @@ class LayoutLMv3ImageProcessor(BaseImageProcessor):
|
||||
images = [self.normalize(image=image, mean=image_mean, std=image_std) for image in images]
|
||||
|
||||
# flip color channels from RGB to BGR (as Detectron2 requires this)
|
||||
images = [flip_channel_order(image) for image in images]
|
||||
images = [to_channel_dimension_format(image, data_format) for image in images]
|
||||
|
||||
data = BatchFeature(data={"pixel_values": images}, tensor_type=return_tensors)
|
||||
|
||||
@@ -24,8 +24,8 @@ from transformers.utils.generic import TensorType
|
||||
from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
|
||||
from ...image_transforms import center_crop, normalize, rescale, resize, to_channel_dimension_format
|
||||
from ...image_utils import (
|
||||
IMAGENET_STANDARD_MEAN,
|
||||
IMAGENET_STANDARD_STD,
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
@@ -61,7 +61,7 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
parameter in the `preprocess` method.
|
||||
size (`Dict[str, int]` *optional*, defaults to `{"height": 224, "width": 224}`):
|
||||
Size of the image after resizing. Can be overridden by the `size` parameter in the `preprocess` method.
|
||||
resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
|
||||
resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
|
||||
Defines the resampling filter to use if resizing the image. Can be overridden by the `resample` parameter
|
||||
in the `preprocess` method.
|
||||
do_rescale (`bool`, *optional*, defaults to `True`):
|
||||
@@ -89,7 +89,7 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
crop_size: Dict[str, int] = None,
|
||||
do_resize: bool = True,
|
||||
size: Dict[str, int] = None,
|
||||
resample: PILImageResampling = PILImageResampling.BILINEAR,
|
||||
resample: PILImageResampling = PILImageResampling.BICUBIC,
|
||||
do_rescale: bool = True,
|
||||
rescale_factor: Union[int, float] = 1 / 255,
|
||||
do_normalize: bool = True,
|
||||
@@ -111,8 +111,8 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
self.do_rescale = do_rescale
|
||||
self.rescale_factor = rescale_factor
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
|
||||
def center_crop(
|
||||
self,
|
||||
@@ -153,7 +153,7 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
image: np.ndarray,
|
||||
size: Dict[str, int],
|
||||
resample: PILImageResampling = PIL.Image.BILINEAR,
|
||||
resample: PILImageResampling = PIL.Image.BICUBIC,
|
||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs
|
||||
) -> np.ndarray:
|
||||
@@ -165,7 +165,7 @@ class PerceiverImageProcessor(BaseImageProcessor):
|
||||
Image to resize.
|
||||
size (`Dict[str, int]`):
|
||||
Size of the output image.
|
||||
resample (`PILImageResampling`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
resample (`PILImageResampling`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
Resampling filter to use when resizing the image.
|
||||
data_format (`str` or `ChannelDimension`, *optional*):
|
||||
The channel dimension format of the image. If not provided, it will be the same as the input image.
|
||||
|
||||
@@ -31,8 +31,8 @@ from ...image_transforms import (
|
||||
to_channel_dimension_format,
|
||||
)
|
||||
from ...image_utils import (
|
||||
IMAGENET_STANDARD_MEAN,
|
||||
IMAGENET_STANDARD_STD,
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
@@ -133,8 +133,8 @@ class PoolFormerImageProcessor(BaseImageProcessor):
|
||||
self.do_rescale = do_rescale
|
||||
self.rescale_factor = rescale_factor
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
|
||||
def resize(
|
||||
self,
|
||||
|
||||
@@ -25,8 +25,8 @@ from transformers.utils.generic import TensorType
|
||||
from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
|
||||
from ...image_transforms import center_crop, normalize, rescale, resize, to_channel_dimension_format
|
||||
from ...image_utils import (
|
||||
IMAGENET_STANDARD_MEAN,
|
||||
IMAGENET_STANDARD_STD,
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
IMAGENET_DEFAULT_STD,
|
||||
ChannelDimension,
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
@@ -115,15 +115,15 @@ class SegformerImageProcessor(BaseImageProcessor):
|
||||
self.do_rescale = do_rescale
|
||||
self.rescale_factor = rescale_factor
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_DEFAULT_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
|
||||
self.do_reduce_labels = do_reduce_labels
|
||||
|
||||
def resize(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
size: Dict[str, int],
|
||||
resample: PILImageResampling = PILImageResampling.BICUBIC,
|
||||
resample: PILImageResampling = PILImageResampling.BILINEAR,
|
||||
data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
**kwargs
|
||||
) -> np.ndarray:
|
||||
@@ -135,7 +135,7 @@ class SegformerImageProcessor(BaseImageProcessor):
|
||||
Image to resize.
|
||||
size (`Dict[str, int]`):
|
||||
Size of the output image.
|
||||
resample (`PILImageResampling`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
resample (`PILImageResampling`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
Resampling filter to use when resiizing the image.
|
||||
data_format (`str` or `ChannelDimension`, *optional*):
|
||||
The channel dimension format of the image. If not provided, it will be the same as the input image.
|
||||
|
||||
@@ -903,7 +903,7 @@ class PerceiverModelIntegrationTest(unittest.TestCase):
|
||||
expected_shape = torch.Size((1, model.config.num_labels))
|
||||
self.assertEqual(logits.shape, expected_shape)
|
||||
|
||||
expected_slice = torch.tensor([-1.1653, -0.1993, -0.7521], device=torch_device)
|
||||
expected_slice = torch.tensor([-1.1652, -0.1992, -0.7520], device=torch_device)
|
||||
|
||||
self.assertTrue(torch.allclose(logits[0, :3], expected_slice, atol=1e-4))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user