From d9809298033f951e1a9c6e7c40a1392c9fa9cf81 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 24 May 2022 12:30:46 +0200 Subject: [PATCH] Enabling `imageGPT` auto feature extractor. (#16871) * Enablign `imageGPT` auto feature extractor. Co-authored-by: ydshieh * Small updates. * Update after rebase to use `input_ids` instead of `pixel_values`. Co-authored-by: ydshieh --- .../models/auto/feature_extraction_auto.py | 1 + src/transformers/pipelines/base.py | 13 +++++++++++-- tests/models/imagegpt/test_modeling_imagegpt.py | 6 ++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/auto/feature_extraction_auto.py b/src/transformers/models/auto/feature_extraction_auto.py index 13ab60c38d..f398efe360 100644 --- a/src/transformers/models/auto/feature_extraction_auto.py +++ b/src/transformers/models/auto/feature_extraction_auto.py @@ -50,6 +50,7 @@ FEATURE_EXTRACTOR_MAPPING_NAMES = OrderedDict( ("flava", "FlavaFeatureExtractor"), ("glpn", "GLPNFeatureExtractor"), ("hubert", "Wav2Vec2FeatureExtractor"), + ("imagegpt", "ImageGPTFeatureExtractor"), ("layoutlmv2", "LayoutLMv2FeatureExtractor"), ("layoutlmv3", "LayoutLMv3FeatureExtractor"), ("maskformer", "MaskFormerFeatureExtractor"), diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 4712eaba57..21311da8e2 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -75,14 +75,19 @@ def _pad(items, key, padding_value, padding_side): # Others include `attention_mask` etc... shape = items[0][key].shape dim = len(shape) - if dim == 4: + if key == "pixel_values": # This is probable image so padding shouldn't be necessary # B, C, H, W return torch.cat([item[key] for item in items], dim=0) max_length = max(item[key].shape[1] for item in items) + min_length = min(item[key].shape[1] for item in items) dtype = items[0][key].dtype if dim == 2: + if max_length == min_length: + # Bypass for `ImageGPT` which doesn't provide a padding value, yet + # we can consistently pad since the size should be matching + return torch.cat([item[key] for item in items], dim=0) tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value elif dim == 3: tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value @@ -146,7 +151,11 @@ def pad_collate_fn(tokenizer, feature_extractor): padded = {} for key in keys: if key in {"input_ids"}: - _padding_value = t_padding_value + # ImageGPT uses a feature extractor + if feature_extractor is not None: + _padding_value = f_padding_value + else: + _padding_value = t_padding_value elif key in {"input_values", "pixel_values", "input_features"}: _padding_value = f_padding_value elif key in {"p_mask", "special_tokens_mask"}: diff --git a/tests/models/imagegpt/test_modeling_imagegpt.py b/tests/models/imagegpt/test_modeling_imagegpt.py index 57b406f646..528532d4cd 100644 --- a/tests/models/imagegpt/test_modeling_imagegpt.py +++ b/tests/models/imagegpt/test_modeling_imagegpt.py @@ -171,6 +171,12 @@ class ImageGPTModelTester: reorder_and_upcast_attn=reorder_and_upcast_attn, ) + def get_pipeline_config(self): + config = self.get_config() + config.vocab_size = 513 + config.max_position_embeddings = 1024 + return config + def prepare_config_and_inputs_for_decoder(self): ( config,