Update examples with image processors (#21155)
* Update examples to use image processors * Small fixes * Resolve conflicts
This commit is contained in:
@@ -34,7 +34,7 @@ import transformers
|
||||
from huggingface_hub import hf_hub_download
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoFeatureExtractor,
|
||||
AutoImageProcessor,
|
||||
AutoModelForSemanticSegmentation,
|
||||
HfArgumentParser,
|
||||
Trainer,
|
||||
@@ -240,7 +240,7 @@ class ModelArguments:
|
||||
default="main",
|
||||
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
|
||||
)
|
||||
feature_extractor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
|
||||
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
@@ -358,7 +358,7 @@ def main():
|
||||
references=labels,
|
||||
num_labels=len(id2label),
|
||||
ignore_index=0,
|
||||
reduce_labels=feature_extractor.do_reduce_labels,
|
||||
reduce_labels=image_processor.do_reduce_labels,
|
||||
)
|
||||
# add per category metrics as individual key-value pairs
|
||||
per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
|
||||
@@ -385,8 +385,8 @@ def main():
|
||||
revision=model_args.model_revision,
|
||||
use_auth_token=True if model_args.use_auth_token else None,
|
||||
)
|
||||
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
||||
model_args.feature_extractor_name or model_args.model_name_or_path,
|
||||
image_processor = AutoImageProcessor.from_pretrained(
|
||||
model_args.image_processor_name or model_args.model_name_or_path,
|
||||
cache_dir=model_args.cache_dir,
|
||||
revision=model_args.model_revision,
|
||||
use_auth_token=True if model_args.use_auth_token else None,
|
||||
@@ -395,11 +395,11 @@ def main():
|
||||
# Define torchvision transforms to be applied to each image + target.
|
||||
# Not that straightforward in torchvision: https://github.com/pytorch/vision/issues/9
|
||||
# Currently based on official torchvision references: https://github.com/pytorch/vision/blob/main/references/segmentation/transforms.py
|
||||
if "shortest_edge" in feature_extractor.size:
|
||||
if "shortest_edge" in image_processor.size:
|
||||
# We instead set the target size as (shortest_edge, shortest_edge) to here to ensure all images are batchable.
|
||||
size = (feature_extractor.size["shortest_edge"], feature_extractor.size["shortest_edge"])
|
||||
size = (image_processor.size["shortest_edge"], image_processor.size["shortest_edge"])
|
||||
else:
|
||||
size = (feature_extractor.size["height"], feature_extractor.size["width"])
|
||||
size = (image_processor.size["height"], image_processor.size["width"])
|
||||
train_transforms = Compose(
|
||||
[
|
||||
ReduceLabels() if data_args.reduce_labels else Identity(),
|
||||
@@ -407,7 +407,7 @@ def main():
|
||||
RandomHorizontalFlip(flip_prob=0.5),
|
||||
PILToTensor(),
|
||||
ConvertImageDtype(torch.float),
|
||||
Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
|
||||
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
|
||||
]
|
||||
)
|
||||
# Define torchvision transform to be applied to each image.
|
||||
@@ -418,7 +418,7 @@ def main():
|
||||
Resize(size=size),
|
||||
PILToTensor(),
|
||||
ConvertImageDtype(torch.float),
|
||||
Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
|
||||
Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -477,7 +477,7 @@ def main():
|
||||
train_dataset=dataset["train"] if training_args.do_train else None,
|
||||
eval_dataset=dataset["validation"] if training_args.do_eval else None,
|
||||
compute_metrics=compute_metrics,
|
||||
tokenizer=feature_extractor,
|
||||
tokenizer=image_processor,
|
||||
data_collator=default_data_collator,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user