Clean up deprecation warnings (#19654)
* Clean up deprecation warnings Notes: Changed some strings in tests to raw strings, which will change the literal content of the strings as they are fed into whatever machine handles them. Test cases for past in the past/past_key_values switch changed/removed due to warning of impending removal * Add PILImageResampling abstraction for PIL.Image.Resampling
This commit is contained in:
@@ -23,6 +23,7 @@ import torch
|
||||
from PIL import Image
|
||||
from torch import nn
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
from utils import img_tensorize
|
||||
|
||||
|
||||
@@ -59,7 +60,7 @@ class ResizeShortestEdge:
|
||||
|
||||
if img.dtype == np.uint8:
|
||||
pil_image = Image.fromarray(img)
|
||||
pil_image = pil_image.resize((neww, newh), Image.BILINEAR)
|
||||
pil_image = pil_image.resize((neww, newh), PILImageResampling.BILINEAR)
|
||||
img = np.asarray(pil_image)
|
||||
else:
|
||||
img = img.permute(2, 0, 1).unsqueeze(0) # 3, 0, 1) # hw(c) -> nchw
|
||||
|
||||
@@ -23,6 +23,7 @@ import torch
|
||||
from PIL import Image
|
||||
from torch import nn
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
from utils import img_tensorize
|
||||
|
||||
|
||||
@@ -59,7 +60,7 @@ class ResizeShortestEdge:
|
||||
|
||||
if img.dtype == np.uint8:
|
||||
pil_image = Image.fromarray(img)
|
||||
pil_image = pil_image.resize((neww, newh), Image.BILINEAR)
|
||||
pil_image = pil_image.resize((neww, newh), PILImageResampling.BILINEAR)
|
||||
img = np.asarray(pil_image)
|
||||
else:
|
||||
img = img.permute(2, 0, 1).unsqueeze(0) # 3, 0, 1) # hw(c) -> nchw
|
||||
|
||||
@@ -815,23 +815,23 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
|
||||
special_tokens_mask = [
|
||||
self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()
|
||||
]
|
||||
special_tokens_mask = np.array(special_tokens_mask, dtype=np.bool)
|
||||
special_tokens_mask = np.array(special_tokens_mask, dtype=bool)
|
||||
else:
|
||||
special_tokens_mask = special_tokens_mask.astype(np.bool)
|
||||
special_tokens_mask = special_tokens_mask.astype(bool)
|
||||
|
||||
probability_matrix[special_tokens_mask] = 0
|
||||
# Numpy doesn't have bernoulli, so we use a binomial with 1 trial
|
||||
masked_indices = np.random.binomial(1, probability_matrix, size=probability_matrix.shape).astype(np.bool)
|
||||
masked_indices = np.random.binomial(1, probability_matrix, size=probability_matrix.shape).astype(bool)
|
||||
labels[~masked_indices] = -100 # We only compute loss on masked tokens
|
||||
|
||||
# 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
|
||||
indices_replaced = np.random.binomial(1, 0.8, size=labels.shape).astype(np.bool) & masked_indices
|
||||
indices_replaced = np.random.binomial(1, 0.8, size=labels.shape).astype(bool) & masked_indices
|
||||
inputs[indices_replaced] = self.tokenizer.mask_token_id
|
||||
|
||||
# 10% of the time, we replace masked input tokens with random word
|
||||
# indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
|
||||
indices_random = (
|
||||
np.random.binomial(1, 0.5, size=labels.shape).astype(np.bool) & masked_indices & ~indices_replaced
|
||||
np.random.binomial(1, 0.5, size=labels.shape).astype(bool) & masked_indices & ~indices_replaced
|
||||
)
|
||||
random_words = np.random.randint(
|
||||
low=0, high=len(self.tokenizer), size=np.count_nonzero(indices_random), dtype=np.int64
|
||||
@@ -1086,12 +1086,12 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):
|
||||
labels = np.copy(inputs)
|
||||
# We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa)
|
||||
|
||||
masked_indices = mask_labels.astype(np.bool)
|
||||
masked_indices = mask_labels.astype(bool)
|
||||
|
||||
special_tokens_mask = [
|
||||
self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()
|
||||
]
|
||||
masked_indices[np.array(special_tokens_mask, dtype=np.bool)] = 0
|
||||
masked_indices[np.array(special_tokens_mask, dtype=bool)] = 0
|
||||
if self.tokenizer._pad_token is not None:
|
||||
padding_mask = labels == self.tokenizer.pad_token_id
|
||||
masked_indices[padding_mask] = 0
|
||||
@@ -1099,13 +1099,13 @@ class DataCollatorForWholeWordMask(DataCollatorForLanguageModeling):
|
||||
labels[~masked_indices] = -100 # We only compute loss on masked tokens
|
||||
|
||||
# 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
|
||||
indices_replaced = np.random.binomial(1, 0.8, size=labels.shape).astype(np.bool) & masked_indices
|
||||
indices_replaced = np.random.binomial(1, 0.8, size=labels.shape).astype(bool) & masked_indices
|
||||
inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids(self.tokenizer.mask_token)
|
||||
|
||||
# 10% of the time, we replace masked input tokens with random word
|
||||
# indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
|
||||
indices_random = (
|
||||
np.random.binomial(1, 0.5, size=labels.shape).astype(np.bool) & masked_indices & ~indices_replaced
|
||||
np.random.binomial(1, 0.5, size=labels.shape).astype(bool) & masked_indices & ~indices_replaced
|
||||
)
|
||||
random_words = np.random.randint(low=0, high=len(self.tokenizer), size=labels.shape, dtype=np.int64)
|
||||
inputs[indices_random] = random_words[indices_random]
|
||||
@@ -1363,7 +1363,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin):
|
||||
|
||||
labels = tf.identity(inputs)
|
||||
# Creating the mask and target_mapping tensors
|
||||
masked_indices = np.full(labels.shape.as_list(), 0, dtype=np.bool)
|
||||
masked_indices = np.full(labels.shape.as_list(), 0, dtype=bool)
|
||||
labels_shape = tf.shape(labels)
|
||||
target_mapping = np.zeros((labels_shape[0], labels_shape[1], labels_shape[1]), dtype=np.float32)
|
||||
|
||||
@@ -1472,7 +1472,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin):
|
||||
|
||||
labels = np.copy(inputs)
|
||||
# Creating the mask and target_mapping tensors
|
||||
masked_indices = np.full(labels.shape, 0, dtype=np.bool)
|
||||
masked_indices = np.full(labels.shape, 0, dtype=bool)
|
||||
target_mapping = np.zeros((labels.shape[0], labels.shape[1], labels.shape[1]), dtype=np.float32)
|
||||
|
||||
for i in range(labels.shape[0]):
|
||||
@@ -1497,7 +1497,7 @@ class DataCollatorForPermutationLanguageModeling(DataCollatorMixin):
|
||||
|
||||
special_tokens_mask = np.array(
|
||||
[self.tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()],
|
||||
dtype=np.bool,
|
||||
dtype=bool,
|
||||
)
|
||||
masked_indices[special_tokens_mask] = 0
|
||||
if self.tokenizer._pad_token is not None:
|
||||
|
||||
@@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
from transformers.utils.import_utils import is_flax_available, is_tf_available, is_torch_available, is_vision_available
|
||||
|
||||
|
||||
@@ -216,7 +217,7 @@ def get_resize_output_image_size(
|
||||
def resize(
|
||||
image,
|
||||
size: Tuple[int, int],
|
||||
resample=PIL.Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
data_format: Optional[ChannelDimension] = None,
|
||||
return_numpy: bool = True,
|
||||
) -> np.ndarray:
|
||||
@@ -228,7 +229,7 @@ def resize(
|
||||
The image to resize.
|
||||
size (`Tuple[int, int]`):
|
||||
The size to use for resizing the image.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
The filter to user for resampling.
|
||||
data_format (`ChannelDimension`, *optional*):
|
||||
The channel dimension format of the output image. If `None`, will use the inferred format from the input.
|
||||
|
||||
@@ -17,6 +17,7 @@ import os
|
||||
from typing import TYPE_CHECKING, List, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
from packaging import version
|
||||
|
||||
import requests
|
||||
|
||||
@@ -34,6 +35,10 @@ if is_vision_available():
|
||||
import PIL.Image
|
||||
import PIL.ImageOps
|
||||
|
||||
if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
|
||||
PILImageResampling = PIL.Image.Resampling
|
||||
else:
|
||||
PILImageResampling = PIL.Image
|
||||
|
||||
if TYPE_CHECKING:
|
||||
if is_torch_available():
|
||||
@@ -364,7 +369,7 @@ class ImageFeatureExtractionMixin:
|
||||
If `size` is an int and `default_to_square` is `True`, then image will be resized to (size, size). If
|
||||
`size` is an int and `default_to_square` is `False`, then smaller edge of the image will be matched to
|
||||
this number. i.e, if height > width, then image will be rescaled to (size * height / width, size).
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
The filter to user for resampling.
|
||||
default_to_square (`bool`, *optional*, defaults to `True`):
|
||||
How to convert `size` when it is a single int. If set to `True`, the `size` will be converted to a
|
||||
@@ -380,7 +385,7 @@ class ImageFeatureExtractionMixin:
|
||||
Returns:
|
||||
image: A resized `PIL.Image.Image`.
|
||||
"""
|
||||
resample = resample if resample is not None else PIL.Image.BILINEAR
|
||||
resample = resample if resample is not None else PILImageResampling.BILINEAR
|
||||
|
||||
self._ensure_format_supported(image)
|
||||
|
||||
|
||||
@@ -22,8 +22,6 @@ from functools import partial
|
||||
from pickle import UnpicklingError
|
||||
from typing import Any, Dict, Set, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
import flax.linen as nn
|
||||
import jax
|
||||
import jax.numpy as jnp
|
||||
@@ -82,9 +80,9 @@ def dtype_byte_size(dtype):
|
||||
4
|
||||
```
|
||||
"""
|
||||
if dtype == np.bool:
|
||||
if dtype == bool:
|
||||
return 1 / 8
|
||||
bit_search = re.search("[^\d](\d+)$", dtype.name)
|
||||
bit_search = re.search(r"[^\d](\d+)$", dtype.name)
|
||||
if bit_search is None:
|
||||
raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
|
||||
bit_size = int(bit_search.groups()[0])
|
||||
|
||||
@@ -605,7 +605,7 @@ def dtype_byte_size(dtype):
|
||||
"""
|
||||
if dtype == tf.bool:
|
||||
return 1 / 8
|
||||
bit_search = re.search("[^\d](\d+)$", dtype.name)
|
||||
bit_search = re.search(r"[^\d](\d+)$", dtype.name)
|
||||
if bit_search is None:
|
||||
raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
|
||||
bit_size = int(bit_search.groups()[0])
|
||||
|
||||
@@ -32,6 +32,7 @@ from transformers import (
|
||||
BeitForMaskedImageModeling,
|
||||
BeitForSemanticSegmentation,
|
||||
)
|
||||
from transformers.image_utils import PILImageResampling
|
||||
from transformers.utils import logging
|
||||
|
||||
|
||||
@@ -269,7 +270,9 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path):
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
image = Image.open(ds[0]["file"])
|
||||
else:
|
||||
feature_extractor = BeitFeatureExtractor(size=config.image_size, resample=Image.BILINEAR, do_center_crop=False)
|
||||
feature_extractor = BeitFeatureExtractor(
|
||||
size=config.image_size, resample=PILImageResampling.BILINEAR, do_center_crop=False
|
||||
)
|
||||
image = prepare_img()
|
||||
|
||||
encoding = feature_extractor(images=image, return_tensors="pt")
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import List, Optional, Tuple, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_STANDARD_MEAN,
|
||||
@@ -50,10 +52,11 @@ class BeitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
|
||||
image is padded with 0's and then center cropped.
|
||||
@@ -77,7 +80,7 @@ class BeitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
self,
|
||||
do_resize=True,
|
||||
size=256,
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
do_center_crop=True,
|
||||
crop_size=224,
|
||||
do_normalize=True,
|
||||
|
||||
@@ -548,7 +548,7 @@ class SudachiTokenizer:
|
||||
raise ValueError("Invalid sudachi_split_mode is specified.")
|
||||
|
||||
self.sudachi = dictionary.Dictionary(
|
||||
config_path=sudachi_config_path, resource_dir=sudachi_resource_dir, dict_type=sudachi_dict_type
|
||||
config_path=sudachi_config_path, resource_dir=sudachi_resource_dir, dict=sudachi_dict_type
|
||||
).create(self.split_mode)
|
||||
|
||||
def tokenize(self, text, never_split=None, **kwargs):
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor
|
||||
from ...utils import TensorType, logging
|
||||
@@ -39,10 +41,11 @@ class CLIPFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
Whether to resize the input to a certain `size`.
|
||||
size (`int`, *optional*, defaults to 224):
|
||||
Resize the input to the given size. Only has an effect if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
|
||||
image is padded with 0's and then center cropped.
|
||||
@@ -64,7 +67,7 @@ class CLIPFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
self,
|
||||
do_resize=True,
|
||||
size=224,
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
do_center_crop=True,
|
||||
crop_size=224,
|
||||
do_normalize=True,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
@@ -47,10 +49,11 @@ class ConvNextFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
|
||||
Resize the input to the given size. If 384 or larger, the image is resized to (`size`, `size`). Else, the
|
||||
smaller edge of the image will be matched to int(`size`/ `crop_pct`), after which the image is cropped to
|
||||
`size`. Only has an effect if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
crop_pct (`float`, *optional*):
|
||||
The percentage of the image to crop. If `None`, then a cropping percentage of 224 / 256 is used. Only has
|
||||
an effect if `do_resize` is set to `True` and `size` < 384.
|
||||
@@ -68,7 +71,7 @@ class ConvNextFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
|
||||
self,
|
||||
do_resize=True,
|
||||
size=224,
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
crop_pct=None,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
|
||||
@@ -143,7 +143,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
@@ -47,10 +49,11 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
|
||||
image is padded with 0's and then center cropped.
|
||||
@@ -70,7 +73,7 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
self,
|
||||
do_resize=True,
|
||||
size=256,
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
do_center_crop=True,
|
||||
crop_size=224,
|
||||
do_normalize=True,
|
||||
|
||||
@@ -25,6 +25,7 @@ from PIL import Image
|
||||
import requests
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers import BeitConfig, BeitFeatureExtractor, BeitForImageClassification, BeitForMaskedImageModeling
|
||||
from transformers.image_utils import PILImageResampling
|
||||
from transformers.utils import logging
|
||||
|
||||
|
||||
@@ -170,7 +171,9 @@ def convert_dit_checkpoint(checkpoint_url, pytorch_dump_folder_path, push_to_hub
|
||||
model.load_state_dict(state_dict)
|
||||
|
||||
# Check outputs on an image
|
||||
feature_extractor = BeitFeatureExtractor(size=config.image_size, resample=Image.BILINEAR, do_center_crop=False)
|
||||
feature_extractor = BeitFeatureExtractor(
|
||||
size=config.image_size, resample=PILImageResampling.BILINEAR, do_center_crop=False
|
||||
)
|
||||
image = prepare_img()
|
||||
|
||||
encoding = feature_extractor(images=image, return_tensors="pt")
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import Optional, Tuple, Union
|
||||
import numpy as np
|
||||
from PIL import Image, ImageOps
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_STANDARD_MEAN,
|
||||
@@ -46,10 +48,11 @@ class DonutFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
|
||||
size (`Tuple(int)`, *optional*, defaults to [1920, 2560]):
|
||||
Resize the shorter edge of the input to the minimum value of the given size. Should be a tuple of (width,
|
||||
height). Only has an effect if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_thumbnail (`bool`, *optional*, defaults to `True`):
|
||||
Whether to thumbnail the input to the given `size`.
|
||||
do_align_long_axis (`bool`, *optional*, defaults to `False`):
|
||||
@@ -71,7 +74,7 @@ class DonutFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
|
||||
self,
|
||||
do_resize=True,
|
||||
size=[1920, 2560],
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
do_thumbnail=True,
|
||||
do_align_long_axis=False,
|
||||
do_pad=True,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import List, Optional, Tuple, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_STANDARD_MEAN,
|
||||
@@ -55,10 +57,11 @@ class DPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
`True`.
|
||||
keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
|
||||
Whether to keep the aspect ratio of the input. Only has an effect if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to normalize the input with mean and standard deviation.
|
||||
image_mean (`List[int]`, defaults to `[0.5, 0.5, 0.5]`):
|
||||
@@ -75,7 +78,7 @@ class DPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
size=384,
|
||||
keep_aspect_ratio=False,
|
||||
ensure_multiple_of=1,
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
image_std=None,
|
||||
|
||||
@@ -22,6 +22,8 @@ from typing import Any, List, Optional, Tuple, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor
|
||||
from ...utils import TensorType, logging
|
||||
@@ -129,9 +131,11 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
|
||||
Whether to resize the input to a certain `size`.
|
||||
size (`int`, *optional*, defaults to 224):
|
||||
Resize the input to the given size. Only has an effect if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
|
||||
image is padded with 0's and then center cropped.
|
||||
@@ -160,9 +164,11 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
|
||||
codebook_size (`int`, *optional*, defaults to 224):
|
||||
Resize the input for codebook to the given size. Only has an effect if `codebook_do_resize` is set to
|
||||
`True`.
|
||||
codebook_resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
codebook_resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
codebook_do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether to crop the input for codebook at the center. If the input size is smaller than
|
||||
`codebook_crop_size` along any edge, the image is padded with 0's and then center cropped.
|
||||
@@ -184,7 +190,7 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
|
||||
self,
|
||||
do_resize: bool = True,
|
||||
size: Union[int, Tuple[int, int]] = 224,
|
||||
resample: int = Image.BICUBIC,
|
||||
resample: int = PILImageResampling.BICUBIC,
|
||||
do_center_crop: bool = True,
|
||||
crop_size: Union[int, Tuple[int, int]] = 224,
|
||||
do_normalize: bool = True,
|
||||
@@ -200,7 +206,7 @@ class FlavaFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
|
||||
# Codebook related params
|
||||
codebook_do_resize: bool = True,
|
||||
codebook_size: bool = 112,
|
||||
codebook_resample: int = Image.LANCZOS,
|
||||
codebook_resample: int = PILImageResampling.LANCZOS,
|
||||
codebook_do_center_crop: bool = True,
|
||||
codebook_crop_size: int = 112,
|
||||
codebook_do_map_pixels: bool = True,
|
||||
|
||||
@@ -19,6 +19,7 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
import PIL.Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
from transformers.utils.generic import TensorType
|
||||
|
||||
from ...image_processing_utils import BaseImageProcessor, BatchFeature
|
||||
@@ -41,7 +42,7 @@ class GLPNImageProcessor(BaseImageProcessor):
|
||||
size_divisor (`int`, *optional*, defaults to 32):
|
||||
Set the class default for the `size_divisor` parameter. When `do_resize` is `True`, images are resized so
|
||||
their height and width are rounded down to the closest multiple of `size_divisor`.
|
||||
resample (`PIL.Image` resampling filter, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
resample (`PIL.Image` resampling filter, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
Set the class default for `resample`. Defines the resampling filter to use if resizing the image.
|
||||
do_rescale (`bool`, *optional*, defaults to `True`):
|
||||
Set the class default for the `do_rescale` parameter. Controls whether or not to apply the scaling factor
|
||||
@@ -54,7 +55,7 @@ class GLPNImageProcessor(BaseImageProcessor):
|
||||
self,
|
||||
do_resize: bool = True,
|
||||
size_divisor: int = 32,
|
||||
resample=PIL.Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
do_rescale: bool = True,
|
||||
**kwargs
|
||||
) -> None:
|
||||
@@ -79,7 +80,7 @@ class GLPNImageProcessor(BaseImageProcessor):
|
||||
The image is resized so its height and width are rounded down to the closest multiple of
|
||||
`size_divisor`.
|
||||
resample:
|
||||
`PIL.Image` resampling filter to use when resizing the image e.g. `PIL.Image.BILINEAR`.
|
||||
`PIL.Image` resampling filter to use when resizing the image e.g. `PIL.Image.Resampling.BILINEAR`.
|
||||
data_format (`ChannelDimension`, *optional*):
|
||||
The channel dimension format for the output image. If `None`, the channel dimension format of the input
|
||||
image is used. Can be one of:
|
||||
@@ -141,8 +142,8 @@ class GLPNImageProcessor(BaseImageProcessor):
|
||||
When `do_resize` is `True`, images are resized so their height and width are rounded down to the
|
||||
closest multiple of `size_divisor`.
|
||||
resample (`PIL.Image` resampling filter, *optional*, defaults to `self.resample`):
|
||||
`PIL.Image` resampling filter to use if resizing the image e.g. `PIL.Image.BILINEAR`. Only has an
|
||||
effect if `do_resize` is set to `True`.
|
||||
`PIL.Image` resampling filter to use if resizing the image e.g. `PIL.Image.Resampling.BILINEAR`. Only
|
||||
has an effect if `do_resize` is set to `True`.
|
||||
do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
|
||||
Whether or not to apply the scaling factor (to make pixel values floats between 0. and 1.).
|
||||
return_tensors (`str`, *optional*):
|
||||
|
||||
@@ -353,7 +353,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
|
||||
def call(
|
||||
self,
|
||||
input_ids: Optional[TFModelInputType] = None,
|
||||
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
@@ -378,11 +378,11 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
|
||||
else:
|
||||
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
||||
|
||||
if past is None:
|
||||
if past_key_values is None:
|
||||
past_length = 0
|
||||
past = [None] * len(self.h)
|
||||
past_key_values = [None] * len(self.h)
|
||||
else:
|
||||
past_length = shape_list(past[0][0])[-2]
|
||||
past_length = shape_list(past_key_values[0][0])[-2]
|
||||
|
||||
if position_ids is None:
|
||||
position_ids = tf.expand_dims(tf.range(past_length, input_shape[-1] + past_length), axis=0)
|
||||
@@ -473,7 +473,7 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
|
||||
all_attentions = () if output_attentions else None
|
||||
all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
|
||||
all_hidden_states = () if output_hidden_states else None
|
||||
for i, (block, layer_past) in enumerate(zip(self.h, past)):
|
||||
for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
|
||||
if output_hidden_states:
|
||||
all_hidden_states = all_hidden_states + (tf.reshape(hidden_states, output_shape),)
|
||||
|
||||
@@ -650,19 +650,20 @@ GPT2_START_DOCSTRING = r"""
|
||||
GPT2_INPUTS_DOCSTRING = r"""
|
||||
Args:
|
||||
input_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, input_ids_length)`):
|
||||
`input_ids_length` = `sequence_length` if `past` is `None` else `past[0].shape[-2]` (`sequence_length` of
|
||||
input past key value states). Indices of input sequence tokens in the vocabulary.
|
||||
`input_ids_length` = `sequence_length` if `past_key_values` is `None` else `past_key_values[0].shape[-2]`
|
||||
(`sequence_length` of input past key value states). Indices of input sequence tokens in the vocabulary.
|
||||
|
||||
If `past` is used, only input IDs that do not have their past calculated should be passed as `input_ids`.
|
||||
If `past_key_values` is used, only input IDs that do not have their past calculated should be passed as
|
||||
`input_ids`.
|
||||
|
||||
Indices can be obtained using [`GPT2Tokenizer`]. See [`PreTrainedTokenizer.__call__`] and
|
||||
[`PreTrainedTokenizer.encode`] for details.
|
||||
|
||||
[What are input IDs?](../glossary#input-ids)
|
||||
past (`List[tf.Tensor]` of length `config.n_layers`):
|
||||
past_key_values (`List[tf.Tensor]` of length `config.n_layers`):
|
||||
Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model (see
|
||||
`past` output below). Can be used to speed up sequential decoding. The token ids which have their past
|
||||
given to this model should not be passed as input ids as they have already been computed.
|
||||
`past_key_values` output below). Can be used to speed up sequential decoding. The token ids which have
|
||||
their past given to this model should not be passed as input ids as they have already been computed.
|
||||
attention_mask (`tf.Tensor` or `Numpy array` of shape `(batch_size, sequence_length)`, *optional*):
|
||||
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
|
||||
|
||||
@@ -734,7 +735,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
|
||||
def call(
|
||||
self,
|
||||
input_ids: Optional[TFModelInputType] = None,
|
||||
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
@@ -759,7 +760,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
|
||||
- 1 for tokens that are **not masked**,
|
||||
- 0 for tokens that are **masked**.
|
||||
|
||||
past (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`)
|
||||
past_key_values (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`)
|
||||
contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
|
||||
If `past` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have
|
||||
their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
@@ -771,7 +772,7 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
|
||||
|
||||
outputs = self.transformer(
|
||||
input_ids=input_ids,
|
||||
past=past,
|
||||
past_key_values=past_key_values,
|
||||
attention_mask=attention_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
position_ids=position_ids,
|
||||
@@ -847,7 +848,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
"input_ids": inputs,
|
||||
"attention_mask": attention_mask,
|
||||
"position_ids": position_ids,
|
||||
"past": past,
|
||||
"past_key_values": past,
|
||||
"use_cache": use_cache,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
@@ -863,7 +864,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
def call(
|
||||
self,
|
||||
input_ids: Optional[TFModelInputType] = None,
|
||||
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
@@ -889,7 +890,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
- 1 for tokens that are **not masked**,
|
||||
- 0 for tokens that are **masked**.
|
||||
|
||||
past (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`)
|
||||
past_key_values (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`)
|
||||
contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
|
||||
If `past` are used, the user can optionally input only the last `decoder_input_ids` (those that don't have
|
||||
their past key value states given to this model) of shape `(batch_size, 1)` instead of all
|
||||
@@ -904,7 +905,7 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids=input_ids,
|
||||
past=past,
|
||||
past_key_values=past_key_values,
|
||||
attention_mask=attention_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
position_ids=position_ids,
|
||||
@@ -982,7 +983,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
|
||||
def call(
|
||||
self,
|
||||
input_ids: Optional[TFModelInputType] = None,
|
||||
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
@@ -1041,7 +1042,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
|
||||
flat_position_ids = tf.reshape(position_ids, (-1, seq_length)) if position_ids is not None else None
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids=flat_input_ids,
|
||||
past=past,
|
||||
past_key_values=past_key_values,
|
||||
attention_mask=flat_attention_mask,
|
||||
token_type_ids=flat_token_type_ids,
|
||||
position_ids=flat_position_ids,
|
||||
@@ -1138,7 +1139,7 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
|
||||
def call(
|
||||
self,
|
||||
input_ids: Optional[TFModelInputType] = None,
|
||||
past: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
past_key_values: Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]] = None,
|
||||
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
|
||||
@@ -1158,7 +1159,7 @@ class TFGPT2ForSequenceClassification(TFGPT2PreTrainedModel, TFSequenceClassific
|
||||
"""
|
||||
transformer_outputs = self.transformer(
|
||||
input_ids=input_ids,
|
||||
past=past,
|
||||
past_key_values=past_key_values,
|
||||
attention_mask=attention_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
position_ids=position_ids,
|
||||
|
||||
@@ -761,7 +761,7 @@ class TFGPTJForCausalLM(TFGPTJPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
"input_ids": inputs,
|
||||
"attention_mask": attention_mask,
|
||||
"position_ids": position_ids,
|
||||
"past": past,
|
||||
"past_key_values": past,
|
||||
"use_cache": use_cache,
|
||||
"token_type_ids": token_type_ids,
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor
|
||||
from ...utils import TensorType, logging
|
||||
@@ -60,17 +62,20 @@ class ImageGPTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to normalize the input to the range between -1 and +1.
|
||||
"""
|
||||
|
||||
model_input_names = ["input_ids"]
|
||||
|
||||
def __init__(self, clusters, do_resize=True, size=32, resample=Image.BILINEAR, do_normalize=True, **kwargs):
|
||||
def __init__(
|
||||
self, clusters, do_resize=True, size=32, resample=PILImageResampling.BILINEAR, do_normalize=True, **kwargs
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self.clusters = np.asarray(clusters)
|
||||
self.do_resize = do_resize
|
||||
|
||||
@@ -21,6 +21,8 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor
|
||||
from ...utils import TensorType, is_pytesseract_available, logging, requires_backends
|
||||
@@ -94,10 +96,11 @@ class LayoutLMv2FeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
apply_ocr (`bool`, *optional*, defaults to `True`):
|
||||
Whether to apply the Tesseract OCR engine to get words + normalized bounding boxes.
|
||||
ocr_lang (`str`, *optional*):
|
||||
@@ -119,7 +122,7 @@ class LayoutLMv2FeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
||||
self,
|
||||
do_resize=True,
|
||||
size=224,
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
apply_ocr=True,
|
||||
ocr_lang=None,
|
||||
tesseract_config="",
|
||||
|
||||
@@ -21,6 +21,8 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, ImageFeatureExtractionMixin, is_torch_tensor
|
||||
from ...utils import TensorType, is_pytesseract_available, logging, requires_backends
|
||||
@@ -93,10 +95,11 @@ class LayoutLMv3FeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to normalize the input with mean and standard deviation.
|
||||
image_mean (`List[int]`, defaults to `[0.5, 0.5, 0.5]`):
|
||||
@@ -124,7 +127,7 @@ class LayoutLMv3FeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
||||
self,
|
||||
do_resize=True,
|
||||
size=224,
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
image_std=None,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
@@ -46,10 +48,11 @@ class LevitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
|
||||
size (`int` or `Tuple(int)`, *optional*, defaults to 224):
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then shorter side of input will be resized to 'size'.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to center crop the input to `size`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
@@ -66,7 +69,7 @@ class LevitFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin)
|
||||
self,
|
||||
do_resize=True,
|
||||
size=224,
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
do_center_crop=True,
|
||||
do_normalize=True,
|
||||
image_mean=IMAGENET_DEFAULT_MEAN,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import ImageFeatureExtractionMixin, ImageInput, is_torch_tensor
|
||||
from ...utils import TensorType, is_torch_available, logging
|
||||
@@ -201,10 +203,11 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
||||
max_size (`int`, *optional*, defaults to 1333):
|
||||
The largest size an image dimension can have (otherwise it's capped). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
size_divisibility (`int`, *optional*, defaults to 32):
|
||||
Some backbones need images divisible by a certain number. If not passed, it defaults to the value used in
|
||||
Swin Transformer.
|
||||
@@ -232,7 +235,7 @@ class MaskFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
||||
do_resize=True,
|
||||
size=800,
|
||||
max_size=1333,
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
size_divisibility=32,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import List, Optional, Tuple, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import ImageFeatureExtractionMixin, ImageInput, is_torch_tensor
|
||||
from ...utils import TensorType, is_torch_available, logging
|
||||
@@ -44,10 +46,11 @@ class MobileViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to match the shorter side. Only has an effect if
|
||||
`do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
|
||||
image is padded with 0's and then center cropped.
|
||||
@@ -63,7 +66,7 @@ class MobileViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi
|
||||
self,
|
||||
do_resize=True,
|
||||
size=288,
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
do_center_crop=True,
|
||||
crop_size=256,
|
||||
do_flip_channel_order=True,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import ImageFeatureExtractionMixin, is_torch_tensor
|
||||
from ...utils import TensorType, is_torch_available, logging
|
||||
@@ -54,10 +56,11 @@ class OwlViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin
|
||||
The size to use for resizing the image. Only has an effect if `do_resize` is set to `True`. If `size` is a
|
||||
sequence like (h, w), output size will be matched to this. If `size` is an int, then image will be resized
|
||||
to (size, size).
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_center_crop (`bool`, *optional*, defaults to `False`):
|
||||
Whether to crop the input at the center. If the input size is smaller than `crop_size` along any edge, the
|
||||
image is padded with 0's and then center cropped.
|
||||
@@ -77,7 +80,7 @@ class OwlViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin
|
||||
self,
|
||||
do_resize=True,
|
||||
size=(768, 768),
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
crop_size=768,
|
||||
do_center_crop=False,
|
||||
do_normalize=True,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
@@ -52,10 +54,11 @@ class PerceiverFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to normalize the input with `image_mean` and `image_std`.
|
||||
image_mean (`List[int]`, defaults to `[0.485, 0.456, 0.406]`):
|
||||
@@ -72,7 +75,7 @@ class PerceiverFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi
|
||||
crop_size=256,
|
||||
do_resize=True,
|
||||
size=224,
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
image_std=None,
|
||||
|
||||
@@ -20,6 +20,8 @@ from typing import Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
@@ -48,10 +50,11 @@ class PoolFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
||||
Center crop the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be center cropped to (size, size). Only has an effect if
|
||||
`do_resize_and_center_crop` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize_and_center_crop` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
crop_pct (`float`, *optional*, defaults to `0.9`):
|
||||
The percentage of the image to crop from the center. Only has an effect if `do_resize_and_center_crop` is
|
||||
set to `True`.
|
||||
@@ -69,7 +72,7 @@ class PoolFormerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionM
|
||||
self,
|
||||
do_resize_and_center_crop=True,
|
||||
size=224,
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
crop_pct=0.9,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import List, Optional, Tuple, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_DEFAULT_MEAN,
|
||||
@@ -50,10 +52,11 @@ class SegformerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to normalize the input with mean and standard deviation.
|
||||
image_mean (`int`, *optional*, defaults to `[0.485, 0.456, 0.406]`):
|
||||
@@ -73,7 +76,7 @@ class SegformerFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMi
|
||||
self,
|
||||
do_resize=True,
|
||||
size=512,
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
image_std=None,
|
||||
|
||||
@@ -133,7 +133,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
|
||||
@@ -134,7 +134,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
|
||||
@@ -169,7 +169,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
|
||||
@@ -183,7 +183,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import ImageFeatureExtractionMixin, ImageInput, is_torch_tensor
|
||||
from ...utils import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, TensorType, logging
|
||||
@@ -39,10 +41,11 @@ class VideoMAEFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
|
||||
Whether to resize the shorter edge of the input to a certain `size`.
|
||||
size (`int`, *optional*, defaults to 224):
|
||||
Resize the shorter edge of the input to the given size. Only has an effect if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_center_crop (`bool`, *optional*, defaults to `True`):
|
||||
Whether to center crop the input to a certain `size`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
@@ -59,7 +62,7 @@ class VideoMAEFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMix
|
||||
self,
|
||||
do_resize=True,
|
||||
size=224,
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
do_center_crop=True,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import List, Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_STANDARD_MEAN,
|
||||
@@ -53,10 +55,11 @@ class ViltFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
`do_resize` is set to `True`.
|
||||
size_divisor (`int`, *optional*, defaults to 32):
|
||||
The size by which to make sure both the height and width can be divided.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to normalize the input with mean and standard deviation.
|
||||
image_mean (`List[int]`, defaults to `[0.5, 0.5, 0.5]`):
|
||||
@@ -72,7 +75,7 @@ class ViltFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
do_resize=True,
|
||||
size=384,
|
||||
size_divisor=32,
|
||||
resample=Image.BICUBIC,
|
||||
resample=PILImageResampling.BICUBIC,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
image_std=None,
|
||||
@@ -87,7 +90,7 @@ class ViltFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
self.image_mean = image_mean if image_mean is not None else IMAGENET_STANDARD_MEAN
|
||||
self.image_std = image_std if image_std is not None else IMAGENET_STANDARD_STD
|
||||
|
||||
def _resize(self, image, shorter=800, longer=1333, size_divisor=32, resample=Image.BICUBIC):
|
||||
def _resize(self, image, shorter=800, longer=1333, size_divisor=32, resample=PILImageResampling.BICUBIC):
|
||||
"""
|
||||
Resizes the shorter edge of `image` to `shorter` and limits the longer edge to under `longer`, while preserving
|
||||
the aspect ratio. Also makes sure that both the height and width can be divided by `size_divisor`.
|
||||
@@ -104,7 +107,7 @@ class ViltFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
The size by which to limit the longer side of the image, while preserving the aspect ratio.
|
||||
size_divisor (`int`, *optional*, defaults to `32`):
|
||||
The size by which both the height and the width must be divisible.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BICUBIC`):
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BICUBIC`):
|
||||
An optional resampling filter.
|
||||
"""
|
||||
if not isinstance(image, Image.Image):
|
||||
|
||||
@@ -19,6 +19,8 @@ from typing import Optional, Union
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from transformers.image_utils import PILImageResampling
|
||||
|
||||
from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin
|
||||
from ...image_utils import (
|
||||
IMAGENET_STANDARD_MEAN,
|
||||
@@ -47,10 +49,11 @@ class ViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an
|
||||
integer is provided, then the input will be resized to (size, size). Only has an effect if `do_resize` is
|
||||
set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BOX`,
|
||||
`PIL.Image.BILINEAR`, `PIL.Image.HAMMING`, `PIL.Image.BICUBIC` or `PIL.Image.LANCZOS`. Only has an effect
|
||||
if `do_resize` is set to `True`.
|
||||
resample (`int`, *optional*, defaults to `PIL.Image.Resampling.BILINEAR`):
|
||||
An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
|
||||
`PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
|
||||
`PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
|
||||
to `True`.
|
||||
do_normalize (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not to normalize the input with mean and standard deviation.
|
||||
image_mean (`List[int]`, defaults to `[0.5, 0.5, 0.5]`):
|
||||
@@ -65,7 +68,7 @@ class ViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
self,
|
||||
do_resize=True,
|
||||
size=224,
|
||||
resample=Image.BILINEAR,
|
||||
resample=PILImageResampling.BILINEAR,
|
||||
do_normalize=True,
|
||||
image_mean=None,
|
||||
image_std=None,
|
||||
|
||||
@@ -182,7 +182,7 @@ def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
|
||||
if embed_dim % 2 != 0:
|
||||
raise ValueError("embed_dim must be even")
|
||||
|
||||
omega = np.arange(embed_dim // 2, dtype=np.float)
|
||||
omega = np.arange(embed_dim // 2, dtype=float)
|
||||
omega /= embed_dim / 2.0
|
||||
omega = 1.0 / 10000**omega # (D/2,)
|
||||
|
||||
|
||||
@@ -150,7 +150,7 @@ def _compute_mask_indices(
|
||||
num_masked_spans = sequence_length // mask_length
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
|
||||
# get random indices to mask
|
||||
spec_aug_mask_idxs = np.array(
|
||||
|
||||
@@ -193,7 +193,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
@@ -266,7 +266,7 @@ def _sample_negative_indices(
|
||||
sampled_negative_indices = np.zeros(shape=(batch_size, sequence_length, num_negatives), dtype=np.int32)
|
||||
|
||||
mask_time_indices = (
|
||||
mask_time_indices.astype(np.bool) if mask_time_indices is not None else np.ones(features_shape, dtype=np.bool)
|
||||
mask_time_indices.astype(bool) if mask_time_indices is not None else np.ones(features_shape, dtype=bool)
|
||||
)
|
||||
|
||||
for batch_idx in range(batch_size):
|
||||
|
||||
@@ -190,7 +190,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
@@ -264,7 +264,7 @@ def _sample_negative_indices(
|
||||
sampled_negative_indices = np.zeros(shape=(batch_size, sequence_length, num_negatives), dtype=np.int32)
|
||||
|
||||
mask_time_indices = (
|
||||
mask_time_indices.astype(np.bool) if mask_time_indices is not None else np.ones(features_shape, dtype=np.bool)
|
||||
mask_time_indices.astype(bool) if mask_time_indices is not None else np.ones(features_shape, dtype=bool)
|
||||
)
|
||||
|
||||
for batch_idx in range(batch_size):
|
||||
|
||||
@@ -142,7 +142,7 @@ def _compute_mask_indices(
|
||||
)
|
||||
|
||||
# SpecAugment mask to fill
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool)
|
||||
spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=bool)
|
||||
spec_aug_mask_idxs = []
|
||||
|
||||
max_num_masked_span = compute_num_masked_span(sequence_length)
|
||||
|
||||
@@ -898,7 +898,7 @@ class TFXGLMForCausalLM(TFXGLMPreTrainedModel, TFCausalLanguageModelingLoss):
|
||||
return {
|
||||
"input_ids": inputs,
|
||||
"attention_mask": attention_mask,
|
||||
"past": past,
|
||||
"past_key_values": past,
|
||||
"use_cache": use_cache,
|
||||
}
|
||||
|
||||
|
||||
@@ -233,7 +233,7 @@ class FlaxBeitModelIntegrationTest(unittest.TestCase):
|
||||
pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values
|
||||
|
||||
# prepare bool_masked_pos
|
||||
bool_masked_pos = np.ones((1, 196), dtype=np.bool)
|
||||
bool_masked_pos = np.ones((1, 196), dtype=bool)
|
||||
|
||||
# forward pass
|
||||
outputs = model(pixel_values=pixel_values, bool_masked_pos=bool_masked_pos)
|
||||
|
||||
@@ -31,6 +31,7 @@ if is_vision_available():
|
||||
from PIL import Image
|
||||
|
||||
from transformers import FlavaFeatureExtractor
|
||||
from transformers.image_utils import PILImageResampling
|
||||
from transformers.models.flava.feature_extraction_flava import (
|
||||
FLAVA_CODEBOOK_MEAN,
|
||||
FLAVA_CODEBOOK_STD,
|
||||
@@ -80,7 +81,7 @@ class FlavaFeatureExtractionTester(unittest.TestCase):
|
||||
self.min_resolution = min_resolution
|
||||
self.max_resolution = max_resolution
|
||||
self.size = size
|
||||
self.resample = resample if resample is not None else Image.BICUBIC
|
||||
self.resample = resample if resample is not None else PILImageResampling.BICUBIC
|
||||
self.do_normalize = do_normalize
|
||||
self.image_mean = image_mean
|
||||
self.image_std = image_std
|
||||
@@ -96,7 +97,7 @@ class FlavaFeatureExtractionTester(unittest.TestCase):
|
||||
|
||||
self.codebook_do_resize = codebook_do_resize
|
||||
self.codebook_size = codebook_size
|
||||
self.codebook_resample = codebook_resample if codebook_resample is not None else Image.LANCZOS
|
||||
self.codebook_resample = codebook_resample if codebook_resample is not None else PILImageResampling.LANCZOS
|
||||
self.codebook_do_center_crop = codebook_do_center_crop
|
||||
self.codebook_crop_size = codebook_crop_size
|
||||
self.codebook_do_map_pixels = codebook_do_map_pixels
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -362,7 +362,7 @@ class FlaxWav2Vec2UtilsTest(unittest.TestCase):
|
||||
self.assertTrue(abs(ppl.item() - 141.4291) < 1e-3)
|
||||
|
||||
# mask half of the input
|
||||
mask = np.ones((2,), dtype=np.bool)
|
||||
mask = np.ones((2,), dtype=bool)
|
||||
mask[0] = 0
|
||||
|
||||
ppl = FlaxWav2Vec2GumbelVectorQuantizer._compute_perplexity(probs, mask)
|
||||
|
||||
@@ -93,7 +93,7 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
|
||||
question_answerer(question="In what field is HuggingFace working ?", context=None)
|
||||
|
||||
outputs = question_answerer(
|
||||
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris.", topk=20
|
||||
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris.", top_k=20
|
||||
)
|
||||
self.assertEqual(
|
||||
outputs, [{"answer": ANY(str), "start": ANY(int), "end": ANY(int), "score": ANY(float)} for i in range(20)]
|
||||
|
||||
@@ -1984,9 +1984,14 @@ class UtilsFunctionsTest(unittest.TestCase):
|
||||
|
||||
@unpack_inputs
|
||||
def call(
|
||||
self, input_ids=None, past=None, output_attentions=None, output_hidden_states=None, return_dict=None
|
||||
self,
|
||||
input_ids=None,
|
||||
past_key_values=None,
|
||||
output_attentions=None,
|
||||
output_hidden_states=None,
|
||||
return_dict=None,
|
||||
):
|
||||
return input_ids, past, output_attentions, output_hidden_states, return_dict
|
||||
return input_ids, past_key_values, output_attentions, output_hidden_states, return_dict
|
||||
|
||||
@unpack_inputs
|
||||
def foo(self, pixel_values, output_attentions=None, output_hidden_states=None, return_dict=None):
|
||||
@@ -1994,55 +1999,48 @@ class UtilsFunctionsTest(unittest.TestCase):
|
||||
|
||||
dummy_model = DummyModel()
|
||||
input_ids = tf.constant([0, 1, 2, 3], dtype=tf.int64)
|
||||
past = tf.constant([4, 5, 6, 7], dtype=tf.int64)
|
||||
past_key_values = tf.constant([4, 5, 6, 7], dtype=tf.int64)
|
||||
pixel_values = tf.constant([8, 9, 10, 11], dtype=tf.int64)
|
||||
|
||||
# test case 1: Pass inputs as keyword arguments; Booleans are inherited from the config.
|
||||
output = dummy_model.call(input_ids=input_ids, past=past)
|
||||
output = dummy_model.call(input_ids=input_ids, past_key_values=past_key_values)
|
||||
tf.debugging.assert_equal(output[0], input_ids)
|
||||
tf.debugging.assert_equal(output[1], past)
|
||||
tf.debugging.assert_equal(output[1], past_key_values)
|
||||
self.assertFalse(output[2])
|
||||
self.assertFalse(output[3])
|
||||
self.assertFalse(output[4])
|
||||
|
||||
# test case 2: Same as above, but with positional arguments.
|
||||
output = dummy_model.call(input_ids, past)
|
||||
output = dummy_model.call(input_ids, past_key_values)
|
||||
tf.debugging.assert_equal(output[0], input_ids)
|
||||
tf.debugging.assert_equal(output[1], past)
|
||||
tf.debugging.assert_equal(output[1], past_key_values)
|
||||
self.assertFalse(output[2])
|
||||
self.assertFalse(output[3])
|
||||
self.assertFalse(output[4])
|
||||
|
||||
# test case 3: We can also pack everything in the first input.
|
||||
output = dummy_model.call(input_ids={"input_ids": input_ids, "past": past})
|
||||
output = dummy_model.call(input_ids={"input_ids": input_ids, "past_key_values": past_key_values})
|
||||
tf.debugging.assert_equal(output[0], input_ids)
|
||||
tf.debugging.assert_equal(output[1], past)
|
||||
tf.debugging.assert_equal(output[1], past_key_values)
|
||||
self.assertFalse(output[2])
|
||||
self.assertFalse(output[3])
|
||||
self.assertFalse(output[4])
|
||||
|
||||
# test case 4: Explicit boolean arguments should override the config.
|
||||
output = dummy_model.call(input_ids=input_ids, past=past, output_attentions=False, return_dict=True)
|
||||
output = dummy_model.call(
|
||||
input_ids=input_ids, past_key_values=past_key_values, output_attentions=False, return_dict=True
|
||||
)
|
||||
tf.debugging.assert_equal(output[0], input_ids)
|
||||
tf.debugging.assert_equal(output[1], past)
|
||||
tf.debugging.assert_equal(output[1], past_key_values)
|
||||
self.assertFalse(output[2])
|
||||
self.assertFalse(output[3])
|
||||
self.assertTrue(output[4])
|
||||
|
||||
# test case 5: Unexpected arguments should raise an exception.
|
||||
with self.assertRaises(ValueError):
|
||||
output = dummy_model.call(input_ids=input_ids, past=past, foo="bar")
|
||||
output = dummy_model.call(input_ids=input_ids, past_key_values=past_key_values, foo="bar")
|
||||
|
||||
# test case 6: Despite the above, `past_key_values` should be interchangeable with `past`
|
||||
# (the decorator moves it to `past`, or vice-versa, depending on the signature).
|
||||
output = dummy_model.call(input_ids=input_ids, past_key_values=past)
|
||||
tf.debugging.assert_equal(output[0], input_ids)
|
||||
tf.debugging.assert_equal(output[1], past)
|
||||
self.assertFalse(output[2])
|
||||
self.assertFalse(output[3])
|
||||
self.assertFalse(output[4])
|
||||
|
||||
# test case 7: the decorator is independent from `main_input_name` -- it treats the first argument of the
|
||||
# test case 6: the decorator is independent from `main_input_name` -- it treats the first argument of the
|
||||
# decorated function as its main input.
|
||||
output = dummy_model.foo(pixel_values=pixel_values)
|
||||
tf.debugging.assert_equal(output[0], pixel_values)
|
||||
|
||||
Reference in New Issue
Block a user