Update object detection with latest resize and pad strategies (#30955)
* Update with new resizing and pad strategy * Return pixel mask param * Update inference in guide * Fix empty compose * Update guide
This commit is contained in:
committed by
GitHub
parent
a25f7d3c12
commit
15585b81a5
@@ -50,7 +50,7 @@ python run_object_detection.py \
|
||||
--per_device_train_batch_size 8 \
|
||||
--gradient_accumulation_steps 1 \
|
||||
--remove_unused_columns false \
|
||||
--eval_do_concat_batches false \
|
||||
--eval_do_concat_batches false \
|
||||
--ignore_mismatched_sizes true \
|
||||
--metric_for_best_model eval_map \
|
||||
--greater_is_better true \
|
||||
@@ -200,6 +200,7 @@ Where `metadata.jsonl` is a file with the following structure:
|
||||
{"file_name": "0002.jpg", "objects": {"bbox": [[810.0, 100.0, 57.0, 28.0]], "categories": [1], "id": [2], "area": [40.0]}}
|
||||
...
|
||||
```
|
||||
Trining script support bounding boxes in COCO format (x_min, y_min, width, height).
|
||||
|
||||
Then, you cat load the dataset with just a few lines of code:
|
||||
|
||||
|
||||
@@ -117,7 +117,10 @@ def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]
|
||||
|
||||
|
||||
def augment_and_transform_batch(
|
||||
examples: Mapping[str, Any], transform: A.Compose, image_processor: AutoImageProcessor
|
||||
examples: Mapping[str, Any],
|
||||
transform: A.Compose,
|
||||
image_processor: AutoImageProcessor,
|
||||
return_pixel_mask: bool = False,
|
||||
) -> BatchFeature:
|
||||
"""Apply augmentations and format annotations in COCO format for object detection task"""
|
||||
|
||||
@@ -139,6 +142,9 @@ def augment_and_transform_batch(
|
||||
# Apply the image processor transformations: resizing, rescaling, normalization
|
||||
result = image_processor(images=images, annotations=annotations, return_tensors="pt")
|
||||
|
||||
if not return_pixel_mask:
|
||||
result.pop("pixel_mask", None)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -415,12 +421,10 @@ def main():
|
||||
)
|
||||
image_processor = AutoImageProcessor.from_pretrained(
|
||||
model_args.image_processor_name or model_args.model_name_or_path,
|
||||
# At this moment we recommend using external transform to pad and resize images.
|
||||
# It`s faster and yields much better results for object-detection models.
|
||||
do_pad=False,
|
||||
do_resize=False,
|
||||
# We will save image size parameter in config just for reference
|
||||
size={"longest_edge": data_args.image_square_size},
|
||||
do_resize=True,
|
||||
size={"max_height": data_args.image_square_size, "max_width": data_args.image_square_size},
|
||||
do_pad=True,
|
||||
pad_size={"height": data_args.image_square_size, "width": data_args.image_square_size},
|
||||
**common_pretrained_args,
|
||||
)
|
||||
|
||||
@@ -428,10 +432,6 @@ def main():
|
||||
# Define image augmentations and dataset transforms
|
||||
# ------------------------------------------------------------------------------------------------
|
||||
max_size = data_args.image_square_size
|
||||
basic_transforms = [
|
||||
A.LongestMaxSize(max_size=max_size),
|
||||
A.PadIfNeeded(max_size, max_size, border_mode=0, value=(128, 128, 128), position="top_left"),
|
||||
]
|
||||
train_augment_and_transform = A.Compose(
|
||||
[
|
||||
A.Compose(
|
||||
@@ -453,12 +453,11 @@ def main():
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.RandomBrightnessContrast(p=0.5),
|
||||
A.HueSaturationValue(p=0.1),
|
||||
*basic_transforms,
|
||||
],
|
||||
bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
|
||||
)
|
||||
validation_transform = A.Compose(
|
||||
basic_transforms,
|
||||
[A.NoOp()],
|
||||
bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
|
||||
)
|
||||
|
||||
|
||||
@@ -120,7 +120,10 @@ def convert_bbox_yolo_to_pascal(boxes: torch.Tensor, image_size: Tuple[int, int]
|
||||
|
||||
# Copied from examples/pytorch/object-detection/run_object_detection.augment_and_transform_batch
|
||||
def augment_and_transform_batch(
|
||||
examples: Mapping[str, Any], transform: A.Compose, image_processor: AutoImageProcessor
|
||||
examples: Mapping[str, Any],
|
||||
transform: A.Compose,
|
||||
image_processor: AutoImageProcessor,
|
||||
return_pixel_mask: bool = False,
|
||||
) -> BatchFeature:
|
||||
"""Apply augmentations and format annotations in COCO format for object detection task"""
|
||||
|
||||
@@ -142,6 +145,9 @@ def augment_and_transform_batch(
|
||||
# Apply the image processor transformations: resizing, rescaling, normalization
|
||||
result = image_processor(images=images, annotations=annotations, return_tensors="pt")
|
||||
|
||||
if not return_pixel_mask:
|
||||
result.pop("pixel_mask", None)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -473,12 +479,10 @@ def main():
|
||||
)
|
||||
image_processor = AutoImageProcessor.from_pretrained(
|
||||
args.model_name_or_path,
|
||||
# At this moment we recommend using external transform to pad and resize images.
|
||||
# It`s faster and yields much better results for object-detection models.
|
||||
do_pad=False,
|
||||
do_resize=False,
|
||||
# We will save image size parameter in config just for reference
|
||||
size={"longest_edge": args.image_square_size},
|
||||
do_resize=True,
|
||||
size={"max_height": args.image_square_size, "max_width": args.image_square_size},
|
||||
do_pad=True,
|
||||
pad_size={"height": args.image_square_size, "width": args.image_square_size},
|
||||
**common_pretrained_args,
|
||||
)
|
||||
|
||||
@@ -486,10 +490,6 @@ def main():
|
||||
# Define image augmentations and dataset transforms
|
||||
# ------------------------------------------------------------------------------------------------
|
||||
max_size = args.image_square_size
|
||||
basic_transforms = [
|
||||
A.LongestMaxSize(max_size=max_size),
|
||||
A.PadIfNeeded(max_size, max_size, border_mode=0, value=(128, 128, 128), position="top_left"),
|
||||
]
|
||||
train_augment_and_transform = A.Compose(
|
||||
[
|
||||
A.Compose(
|
||||
@@ -511,12 +511,11 @@ def main():
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.RandomBrightnessContrast(p=0.5),
|
||||
A.HueSaturationValue(p=0.1),
|
||||
*basic_transforms,
|
||||
],
|
||||
bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
|
||||
)
|
||||
validation_transform = A.Compose(
|
||||
basic_transforms,
|
||||
[A.NoOp()],
|
||||
bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user