# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OpenMMLab. All rights reserved.
"""Utils for data transform functions."""
from __future__ import annotations
import copy
import functools
import inspect
import itertools
import weakref
from typing import TYPE_CHECKING, Sequence
import cv2
import numpy as np
import torch
from shapely import geometry
from torch import BoolTensor, Tensor
if TYPE_CHECKING:
from datumaro import Polygon
CV2_INTERP_CODES = {
"nearest": cv2.INTER_NEAREST,
"bilinear": cv2.INTER_LINEAR,
"bicubic": cv2.INTER_CUBIC,
"area": cv2.INTER_AREA,
"lanczos": cv2.INTER_LANCZOS4,
}
[docs]
class cache_randomness: # noqa: N801
"""Decorator that marks the method with random return value(s) in a transform class.
Reference : https://github.com/open-mmlab/mmcv/blob/v2.1.0/mmcv/transforms/utils.py#L15-L87
This decorator is usually used together with the context-manager
:func`:cache_random_params`. In this context, a decorated method will
cache its return value(s) at the first time of being invoked, and always
return the cached values when being invoked again.
.. note::
Only an instance method can be decorated with ``cache_randomness``.
"""
def __init__(self, func): # noqa: ANN001
# Check `func` is to be bound as an instance method
if not inspect.isfunction(func):
msg = "Unsupport callable to decorate with@cache_randomness."
raise TypeError(msg)
func_args = inspect.getfullargspec(func).args
if len(func_args) == 0 or func_args[0] != "self":
msg = (
"@cache_randomness should only be used to decorate instance methods (the first argument is ``self``).",
)
raise TypeError(msg)
functools.update_wrapper(self, func)
self.func = func
self.instance_ref = None
def __set_name__(self, owner, name): # noqa: ANN001
# Maintain a record of decorated methods in the class
if not hasattr(owner, "_methods_with_randomness"):
owner._methods_with_randomness = [] # noqa: SLF001
# Here `name` equals to `self.__name__`, i.e., the name of the
# decorated function, due to the invocation of `update_wrapper` in
# `self.__init__()`
owner._methods_with_randomness.append(name) # noqa: SLF001
[docs]
def __call__(self, *args, **kwargs): # noqa: D102
# Get the transform instance whose method is decorated
# by cache_randomness
instance = self.instance_ref()
name = self.__name__
# Check the flag ``self._cache_enabled``, which should be
# set by the contextmanagers like ``cache_random_parameters```
cache_enabled = getattr(instance, "_cache_enabled", False)
if cache_enabled:
# Initialize the cache of the transform instances. The flag
# ``cache_enabled``` is set by contextmanagers like
# ``cache_random_params```.
if not hasattr(instance, "_cache"):
instance._cache = {} # noqa: SLF001
if name not in instance._cache: # noqa: SLF001
instance._cache[name] = self.func(instance, *args, **kwargs) # noqa: SLF001
# Return the cached value
return instance._cache[name] # noqa: SLF001
# Clear cache
if hasattr(instance, "_cache"):
del instance._cache # noqa: SLF001
# Return function output
return self.func(instance, *args, **kwargs)
def __get__(self, obj, cls): # noqa: ANN001
self.instance_ref = weakref.ref(obj)
# Return a copy to avoid multiple transform instances sharing
# one `cache_randomness` instance, which may cause data races
# in multithreading cases.
return copy.copy(self)
[docs]
def get_image_shape(img: np.ndarray | Tensor | list) -> tuple[int, int]:
"""Get image(s) shape with (height, width)."""
if not isinstance(img, (np.ndarray, Tensor, list)):
msg = f"{type(img)} is not supported."
raise TypeError(msg)
if isinstance(img, np.ndarray):
return img.shape[:2]
if isinstance(img, Tensor):
return img.shape[-2:]
return get_image_shape(img[0]) # for list
[docs]
def to_np_image(img: np.ndarray | Tensor | list) -> np.ndarray | list[np.ndarray]:
"""Convert torch.Tensor 3D image to numpy 3D image.
TODO (sungchul): move it into base data entity?
"""
if isinstance(img, np.ndarray):
return img
if isinstance(img, list):
return [to_np_image(im) for im in img]
return np.ascontiguousarray(img.numpy().transpose(1, 2, 0))
[docs]
def rescale_bboxes(boxes: Tensor, scale_factor: tuple[float, float]) -> Tensor:
"""Rescale boxes w.r.t. rescale_factor in-place.
Note:
Both ``rescale_`` and ``resize_`` will enlarge or shrink boxes
w.r.t ``scale_facotr``. The difference is that ``resize_`` only
changes the width and the height of boxes, but ``rescale_`` also
rescales the box centers simultaneously.
Args:
boxes (Tensor): bounding boxes to be rescaled.
scale_factor (tuple[float, float]): factors for scaling boxes with (height, width).
It will be used after flipped. The length should be 2.
Returns:
(Tensor): rescaled bounding boxes.
"""
assert len(scale_factor) == 2 # noqa: S101
scale_factor = boxes.new_tensor(scale_factor[::-1]).repeat(2)
return boxes * scale_factor
[docs]
def rescale_masks(
masks: np.ndarray,
scale_factor: float | tuple[float, float], # (H, W)
interpolation: str = "nearest",
) -> np.ndarray:
"""Rescale masks as large as possible while keeping the aspect ratio.
Args:
masks (np.ndarray): Masks to be rescaled.
scale_factor (float | tuple[float, float]): Scale factor to be applied to masks with (height, width).
interpolation (str): Interpolation mode. Defaults to `nearest`.
Returns:
(np.ndarray) : The rescaled masks.
"""
h, w = masks.shape[1:]
new_size = rescale_size((h, w), scale_factor) # (H, W)
# flipping `new_size` is required because cv2.resize uses (W, H)
return np.stack(
[cv2.resize(mask, new_size[::-1], interpolation=CV2_INTERP_CODES[interpolation]) for mask in masks],
)
[docs]
def rescale_polygons(polygons: list[Polygon], scale_factor: float | tuple[float, float]) -> list[Polygon]:
"""Rescale polygons as large as possible while keeping the aspect ratio.
Args:
polygons (np.ndarray): Polygons to be rescaled.
scale_factor (float | tuple[float, float]): Scale factor to be applied to polygons with (height, width)
or single float value.
Returns:
(np.ndarray) : The rescaled polygons.
"""
if isinstance(scale_factor, float):
w_scale = h_scale = scale_factor
else:
h_scale, w_scale = scale_factor
for polygon in polygons:
p = np.asarray(polygon.points, dtype=np.float32)
p[0::2] *= w_scale
p[1::2] *= h_scale
polygon.points = p.tolist()
return polygons
[docs]
def rescale_keypoints(keypoints: Tensor, scale_factor: float | tuple[float, float]) -> Tensor:
"""Rescale keypoints as large as possible while keeping the aspect ratio.
Args:
keypoints (Tensor): Keypoints to be rescaled.
scale_factor (float | tuple[float, float]): Scale factor to be applied to keypoints with (height, width)
or single float value.
Returns:
(Tensor): The rescaled keypoints.
"""
if isinstance(scale_factor, float):
w_scale = h_scale = scale_factor
else:
h_scale, w_scale = scale_factor
keypoints[:, 0] *= w_scale
keypoints[:, 1] *= h_scale
return keypoints
[docs]
def translate_bboxes(boxes: Tensor, distances: Sequence[float]) -> Tensor:
"""Translate boxes in-place.
Args:
boxes (Tensor): Bounding boxes to be translated.
distances (Sequence[float]): Translate distances. The first
is horizontal distance and the second is vertical distance.
Returns:
(Tensor): Translated bounding boxes.
"""
assert len(distances) == 2 # noqa: S101
return boxes + boxes.new_tensor(distances).repeat(2)
[docs]
def translate_masks(
masks: np.ndarray,
out_shape: tuple[int, int],
offset: int | float,
direction: str = "horizontal",
border_value: int | tuple[int] = 0,
interpolation: str = "bilinear",
) -> np.ndarray:
"""Translate the masks.
Args:
masks (np.ndarray): Masks to be translated.
out_shape (tuple[int]): Shape for output mask, format (h, w).
offset (int | float): The offset for translate.
direction (str): The translate direction, either "horizontal" or "vertical".
border_value (int | tuple[int]): Border value. Default 0 for masks.
interpolation (str): Interpolation method, accepted values are
'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Defaults to
'bilinear'.
Returns:
(np.ndarray): Translated BitmapMasks.
"""
dtype = masks.dtype
if masks.shape[-2:] != out_shape:
empty_masks = np.zeros((masks.shape[0], *out_shape), dtype=dtype)
min_h = min(out_shape[0], masks.shape[1])
min_w = min(out_shape[1], masks.shape[2])
empty_masks[:, :min_h, :min_w] = masks[:, :min_h, :min_w]
masks = empty_masks
# from https://github.com/open-mmlab/mmcv/blob/v2.1.0/mmcv/image/geometric.py#L740-L788
height, width = masks.shape[1:]
if masks.ndim == 2:
channels = 1
elif masks.ndim == 3:
channels = masks.shape[0]
if isinstance(border_value, int):
border_value = tuple([border_value] * channels) # type: ignore[assignment]
elif isinstance(border_value, tuple):
assert len(border_value) == channels, ( # noqa: S101
"Expected the num of elements in tuple equals the channels"
f"of input image. Found {len(border_value)} vs {channels}"
)
else:
msg = f"Invalid type {type(border_value)} for `border_value`."
raise ValueError(msg) # noqa: TRY004
translate_matrix = _get_translate_matrix(offset, direction)
translated_masks = cv2.warpAffine(
masks.transpose((1, 2, 0)),
translate_matrix,
(width, height),
# Note case when the number elements in `border_value`
# greater than 3 (e.g. translating masks whose channels
# large than 3) will raise TypeError in `cv2.warpAffine`.
# Here simply slice the first 3 values in `border_value`.
borderValue=border_value[:3], # type: ignore[index]
flags=CV2_INTERP_CODES[interpolation],
)
if translated_masks.ndim == 2:
translated_masks = translated_masks[:, :, None]
return translated_masks.transpose((2, 0, 1)).astype(dtype)
[docs]
def translate_polygons(
polygons: list[Polygon],
out_shape: tuple[int, int],
offset: int | float,
direction: str = "horizontal",
border_value: int | float = 0,
) -> list[Polygon]:
"""Translate polygons."""
assert ( # noqa: S101
border_value is None or border_value == 0
), f"Here border_value is not used, and defaultly should be None or 0. got {border_value}."
axis = 0 if direction == "horizontal" else 1
out = out_shape[1] if direction == "horizontal" else out_shape[0]
for polygon in polygons:
p = np.asarray(polygon.points)
p[axis::2] = np.clip(p[axis::2] + offset, 0, out)
polygon.points = p.tolist()
return polygons
def _get_translate_matrix(offset: int | float, direction: str = "horizontal") -> np.ndarray:
"""Generate the translate matrix.
Args:
offset (int | float): The offset used for translate.
direction (str): The translate direction, either
"horizontal" or "vertical".
Returns:
ndarray: The translate matrix with dtype float32.
"""
if direction == "horizontal":
translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]])
elif direction == "vertical":
translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]])
return translate_matrix
[docs]
def clip_bboxes(boxes: Tensor, img_shape: tuple[int, int]) -> Tensor:
"""Clip boxes according to the image shape in-place.
Args:
img_shape (tuple[int, int]): A tuple of image height and width.
Returns:
(Tensor): Clipped boxes.
"""
h, w = img_shape
boxes[..., 0::2] = boxes[..., 0::2].clamp(0, w)
boxes[..., 1::2] = boxes[..., 1::2].clamp(0, h)
return boxes
[docs]
def is_inside_bboxes(
boxes: Tensor,
img_shape: tuple[int, int],
all_inside: bool = False,
allowed_border: int = 0,
) -> BoolTensor:
"""Find boxes inside the image.
Args:
boxes (Tensor): Bounding boxes to be checked.
img_shape (tuple[int, int]): A tuple of image height and width.
all_inside (bool): Whether the boxes are all inside the image or
part inside the image. Defaults to False.
allowed_border (int): Boxes that extend beyond the image shape
boundary by more than ``allowed_border`` are considered
"outside" Defaults to 0.
Returns:
(BoolTensor): A BoolTensor indicating whether the box is inside
the image. Assuming the original boxes have shape (m, n, 4),
the output has shape (m, n).
"""
img_h, img_w = img_shape
if all_inside:
return (
(boxes[:, 0] >= -allowed_border)
& (boxes[:, 1] >= -allowed_border)
& (boxes[:, 2] < img_w + allowed_border)
& (boxes[:, 3] < img_h + allowed_border)
)
return (
(boxes[..., 0] < img_w + allowed_border)
& (boxes[..., 1] < img_h + allowed_border)
& (boxes[..., 2] > -allowed_border)
& (boxes[..., 3] > -allowed_border)
)
[docs]
def flip_bboxes(boxes: Tensor, img_shape: tuple[int, int], direction: str = "horizontal") -> Tensor:
"""Flip boxes horizontally or vertically in-place.
Args:
boxes (Tensor): Bounding boxes to be flipped.
img_shape (Tuple[int, int]): A tuple of image height and width.
direction (str): Flip direction, options are "horizontal",
"vertical" and "diagonal". Defaults to "horizontal"
Returns:
(Tensor): Flipped bounding boxes.
"""
assert direction in ["horizontal", "vertical", "diagonal"] # noqa: S101
flipped = boxes.clone()
if direction == "horizontal":
flipped[..., 0] = img_shape[1] - boxes[..., 2]
flipped[..., 2] = img_shape[1] - boxes[..., 0]
elif direction == "vertical":
flipped[..., 1] = img_shape[0] - boxes[..., 3]
flipped[..., 3] = img_shape[0] - boxes[..., 1]
else:
flipped[..., 0] = img_shape[1] - boxes[..., 2]
flipped[..., 1] = img_shape[0] - boxes[..., 3]
flipped[..., 2] = img_shape[1] - boxes[..., 0]
flipped[..., 3] = img_shape[0] - boxes[..., 1]
return flipped
[docs]
def overlap_bboxes(
bboxes1: Tensor,
bboxes2: Tensor,
mode: str = "iou",
is_aligned: bool = False,
eps: float = 1e-6,
) -> Tensor:
"""Calculate overlap between two set of bboxes.
FP16 Contributed by https://github.com/open-mmlab/mmdetection/pull/4889
Note:
Assume bboxes1 is M x 4, bboxes2 is N x 4, when mode is 'iou',
there are some new generated variable when calculating IOU
using overlap_bboxes function:
1) is_aligned is False
area1: M x 1
area2: N x 1
lt: M x N x 2
rb: M x N x 2
wh: M x N x 2
overlap: M x N x 1
union: M x N x 1
ious: M x N x 1
Total memory:
S = (9 x N x M + N + M) * 4 Byte,
When using FP16, we can reduce:
R = (9 x N x M + N + M) * 4 / 2 Byte
R large than (N + M) * 4 * 2 is always true when N and M >= 1.
Obviously, N + M <= N * M < 3 * N * M, when N >=2 and M >=2,
N + 1 < 3 * N, when N or M is 1.
Given M = 40 (ground truth), N = 400000 (three anchor boxes
in per grid, FPN, R-CNNs),
R = 275 MB (one times)
A special case (dense detection), M = 512 (ground truth),
R = 3516 MB = 3.43 GB
When the batch size is B, reduce:
B x R
Therefore, CUDA memory runs out frequently.
Experiments on GeForce RTX 2080Ti (11019 MiB):
| dtype | M | N | Use | Real | Ideal |
|:----:|:----:|:----:|:----:|:----:|:----:|
| FP32 | 512 | 400000 | 8020 MiB | -- | -- |
| FP16 | 512 | 400000 | 4504 MiB | 3516 MiB | 3516 MiB |
| FP32 | 40 | 400000 | 1540 MiB | -- | -- |
| FP16 | 40 | 400000 | 1264 MiB | 276MiB | 275 MiB |
2) is_aligned is True
area1: N x 1
area2: N x 1
lt: N x 2
rb: N x 2
wh: N x 2
overlap: N x 1
union: N x 1
ious: N x 1
Total memory:
S = 11 x N * 4 Byte
When using FP16, we can reduce:
R = 11 x N * 4 / 2 Byte
So do the 'giou' (large than 'iou').
Time-wise, FP16 is generally faster than FP32.
When gpu_assign_thr is not -1, it takes more time on cpu
but not reduce memory.
There, we can reduce half the memory and keep the speed.
If ``is_aligned`` is ``False``, then calculate the overlaps between each
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
pair of bboxes1 and bboxes2.
Args:
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
B indicates the batch dim, in shape (B1, B2, ..., Bn).
If ``is_aligned`` is ``True``, then m and n must be equal.
mode (str): "iou" (intersection over union), "iof" (intersection over
foreground) or "giou" (generalized intersection over union).
Default "iou".
is_aligned (bool, optional): If True, then m and n must be equal.
Default False.
eps (float, optional): A value added to the denominator for numerical
stability. Default 1e-6.
Returns:
Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)
Example:
>>> bboxes1 = torch.FloatTensor([
>>> [0, 0, 10, 10],
>>> [10, 10, 20, 20],
>>> [32, 32, 38, 42],
>>> ])
>>> bboxes2 = torch.FloatTensor([
>>> [0, 0, 10, 20],
>>> [0, 10, 10, 19],
>>> [10, 10, 20, 20],
>>> ])
>>> overlaps = overlap_bboxes(bboxes1, bboxes2)
>>> assert overlaps.shape == (3, 3)
>>> overlaps = overlap_bboxes(bboxes1, bboxes2, is_aligned=True)
>>> assert overlaps.shape == (3, )
Example:
>>> empty = torch.empty(0, 4)
>>> nonempty = torch.FloatTensor([[0, 0, 10, 9]])
>>> assert tuple(overlap_bboxes(empty, nonempty).shape) == (0, 1)
>>> assert tuple(overlap_bboxes(nonempty, empty).shape) == (1, 0)
>>> assert tuple(overlap_bboxes(empty, empty).shape) == (0, 0)
"""
assert mode in ["iou", "iof", "giou"], f"Unsupported mode {mode}" # noqa: S101
# Either the boxes are empty or the length of boxes' last dimension is 4
assert bboxes1.size(-1) == 4 or bboxes1.size(0) == 0 # noqa: S101
assert bboxes2.size(-1) == 4 or bboxes2.size(0) == 0 # noqa: S101
# Batch dim must be the same
# Batch dim: (B1, B2, ... Bn)
assert bboxes1.shape[:-2] == bboxes2.shape[:-2] # noqa: S101
batch_shape = bboxes1.shape[:-2]
rows = bboxes1.size(-2)
cols = bboxes2.size(-2)
if is_aligned:
assert rows == cols # noqa: S101
if rows * cols == 0:
if is_aligned:
return bboxes1.new((*batch_shape, rows))
return bboxes1.new((*batch_shape, rows, cols))
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
if is_aligned:
lt = torch.max(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2]
rb = torch.min(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2]
wh = fp16_clamp(rb - lt, min=0)
overlap = wh[..., 0] * wh[..., 1]
union = area1 + area2 - overlap if mode in ["iou", "giou"] else area1
if mode == "giou":
enclosed_lt = torch.min(bboxes1[..., :2], bboxes2[..., :2])
enclosed_rb = torch.max(bboxes1[..., 2:], bboxes2[..., 2:])
else:
lt = torch.max(bboxes1[..., :, None, :2], bboxes2[..., None, :, :2]) # [B, rows, cols, 2]
rb = torch.min(bboxes1[..., :, None, 2:], bboxes2[..., None, :, 2:]) # [B, rows, cols, 2]
wh = fp16_clamp(rb - lt, min=0)
overlap = wh[..., 0] * wh[..., 1]
union = area1[..., None] + area2[..., None, :] - overlap if mode in ["iou", "giou"] else area1[..., None]
if mode == "giou":
enclosed_lt = torch.min(bboxes1[..., :, None, :2], bboxes2[..., None, :, :2])
enclosed_rb = torch.max(bboxes1[..., :, None, 2:], bboxes2[..., None, :, 2:])
eps = union.new_tensor([eps])
union = torch.max(union, eps)
ious = overlap / union
if mode in ["iou", "iof"]:
return ious
# calculate gious
enclose_wh = fp16_clamp(enclosed_rb - enclosed_lt, min=0)
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
enclose_area = torch.max(enclose_area, eps)
return ious - (enclose_area - union) / enclose_area
[docs]
def centers_bboxes(boxes: Tensor) -> Tensor:
"""Return a tensor representing the centers of boxes."""
return (boxes[..., :2] + boxes[..., 2:]) / 2
[docs]
def fp16_clamp(x: Tensor, min: float | None = None, max: float | None = None) -> Tensor: # noqa: A002
"""Clamp fp16 tensor."""
if not x.is_cuda and x.dtype == torch.float16:
# clamp for cpu float16, tensor fp16 has no clamp implementation
return x.float().clamp(min, max).half()
return x.clamp(min, max)
[docs]
def scale_size(
size: tuple[int, int],
scale: float | int | tuple[float, float] | tuple[int, int],
) -> tuple[int, int]:
"""Rescale a size by a ratio.
Args:
size (tuple[int]): (height, width).
scale (float | int | tuple(float) | tuple(int)): Scaling factor with (height, width).
Returns:
tuple[int]: scaled size with (height, width).
"""
if isinstance(scale, (float, int)):
scale = (scale, scale)
h, w = size
return int(h * float(scale[0]) + 0.5), int(w * float(scale[1]) + 0.5)
[docs]
def rescale_size(
old_size: tuple,
scale: float | int | tuple[float, float] | tuple[int, int],
return_scale: bool = False,
) -> tuple[int, int] | tuple[tuple[int, int], float | int]:
"""Calculate the new size to be rescaled to.
Args:
old_size (tuple[int]): The old size (height, width) of image.
scale (float | int | tuple[float] | tuple[int]): The scaling factor or maximum size.
If it is a float number, an integer, or a tuple of 2 float numbers,
then the image will be rescaled by this factor, else if it is a tuple of 2 integers,
then the image will be rescaled as large as possible within the scale.
return_scale (bool): Whether to return the scaling factor besides the rescaled image size.
Returns:
tuple[int]: The new rescaled image size with (height, width).
If return_scale is True, scale_factor obtained again will be returned as well.
"""
h, w = old_size
msg = ""
if isinstance(scale, (float, int)):
if scale <= 0:
msg = f"Invalid scale {scale}, must be positive."
raise ValueError(msg)
scale_factor = scale
elif isinstance(scale, tuple):
if isinstance(scale[0], int):
max_long_edge = max(scale)
max_short_edge = min(scale)
scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w))
elif isinstance(scale[0], float):
scale_factor = scale # type: ignore[assignment]
else:
msg = f"Scale must be a number or tuple of int/float, but got tuple of {type(scale[0])}"
else:
msg = f"Scale must be a number or tuple of int/float, but got {type(scale)}"
if msg:
raise TypeError(msg)
new_size = scale_size((h, w), scale_factor)
if return_scale:
return new_size, scale_factor
return new_size
[docs]
def flip_image(img: np.ndarray | list[np.ndarray], direction: str = "horizontal") -> np.ndarray | list[np.ndarray]:
"""Flip an image horizontally or vertically.
Args:
img (ndarray): Image to be flipped.
direction (str): The flip direction, either "horizontal" or
"vertical" or "diagonal".
Returns:
ndarray: The flipped image.
"""
if direction not in ["horizontal", "vertical", "diagonal"]:
msg = f"direction (={direction}) should be in one of ('horizontal', 'vertical', 'diagonal')."
raise ValueError(msg)
if isinstance(img, list):
return [flip_image(im, direction) for im in img]
if direction == "horizontal":
return np.flip(img, axis=1)
elif direction == "vertical": # noqa: RET505
return np.flip(img, axis=0)
else:
return np.flip(img, axis=(0, 1))
[docs]
def flip_masks(masks: np.ndarray, direction: str = "horizontal") -> np.ndarray:
"""Flip masks alone the given direction."""
assert direction in ("horizontal", "vertical", "diagonal") # noqa: S101
return np.stack([flip_image(mask, direction=direction) for mask in masks])
[docs]
def flip_polygons(polygons: list[Polygon], height: int, width: int, direction: str = "horizontal") -> list[Polygon]:
"""Flip polygons alone the given direction."""
for polygon in polygons:
p = np.asarray(polygon.points)
if direction == "horizontal":
p[0::2] = width - p[0::2]
elif direction == "vertical":
p[1::2] = height - p[1::2]
else:
p[0::2] = width - p[0::2]
p[1::2] = height - p[1::2]
polygon.points = p.tolist()
return polygons
[docs]
def project_bboxes(boxes: Tensor, homography_matrix: Tensor | np.ndarray) -> Tensor:
"""Geometric transformat boxes in-place.
Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/structures/bbox/horizontal_boxes.py#L184-L202
Args:
homography_matrix (Tensor or np.ndarray]):
Shape (3, 3) for geometric transformation.
Returns:
(Tensor | np.ndarray): Projected bounding boxes.
"""
if isinstance(homography_matrix, np.ndarray):
homography_matrix = boxes.new_tensor(homography_matrix)
corners = hbox2corner(boxes)
corners = torch.cat([corners, corners.new_ones(*corners.shape[:-1], 1)], dim=-1)
corners_t = torch.transpose(corners, -1, -2)
corners_t = torch.matmul(homography_matrix, corners_t)
corners = torch.transpose(corners_t, -1, -2)
# Convert to homogeneous coordinates by normalization
corners = corners[..., :2] / corners[..., 2:3]
return corner2hbox(corners)
[docs]
def hbox2corner(boxes: Tensor) -> Tensor:
"""Convert box coordinates from (x1, y1, x2, y2) to corners ((x1, y1), (x2, y1), (x1, y2), (x2, y2)).
Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/structures/bbox/horizontal_boxes.py#L204-L217
Args:
boxes (Tensor): Horizontal box tensor with shape of (..., 4).
Returns:
Tensor: Corner tensor with shape of (..., 4, 2).
"""
x1, y1, x2, y2 = torch.split(boxes, 1, dim=-1)
corners = torch.cat([x1, y1, x2, y1, x1, y2, x2, y2], dim=-1)
return corners.reshape(*corners.shape[:-1], 4, 2)
[docs]
def corner2hbox(corners: Tensor) -> Tensor:
"""Convert box coordinates from corners ((x1, y1), (x2, y1), (x1, y2), (x2, y2)) to (x1, y1, x2, y2).
Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/structures/bbox/horizontal_boxes.py#L219-L234
Args:
corners (Tensor): Corner tensor with shape of (..., 4, 2).
Returns:
Tensor: Horizontal box tensor with shape of (..., 4).
"""
if corners.numel() == 0:
return corners.new_zeros((0, 4))
min_xy = corners.min(dim=-2)[0]
max_xy = corners.max(dim=-2)[0]
return torch.cat([min_xy, max_xy], dim=-1)
[docs]
def crop_masks(masks: np.ndarray, bbox: np.ndarray) -> np.ndarray:
"""Crop each mask by the given bbox."""
assert isinstance(bbox, np.ndarray) # noqa: S101
assert bbox.ndim == 1 # noqa: S101
height, width = masks.shape[1:]
# clip the boundary
bbox = bbox.copy()
bbox[0::2] = np.clip(bbox[0::2], 0, width)
bbox[1::2] = np.clip(bbox[1::2], 0, height)
x1, y1, x2, y2 = bbox
w = np.maximum(x2 - x1, 1)
h = np.maximum(y2 - y1, 1)
return masks[:, y1 : y1 + h, x1 : x1 + w]
[docs]
def crop_polygons(polygons: list[Polygon], bbox: np.ndarray, height: int, width: int) -> list[Polygon]:
"""Crop each polygon by the given bbox."""
assert isinstance(bbox, np.ndarray) # noqa: S101
assert bbox.ndim == 1 # noqa: S101
# clip the boundary
bbox = bbox.copy()
bbox[0::2] = np.clip(bbox[0::2], 0, width)
bbox[1::2] = np.clip(bbox[1::2], 0, height)
x1, y1, x2, y2 = bbox
# reference: https://github.com/facebookresearch/fvcore/blob/main/fvcore/transforms/transform.py
crop_box = geometry.box(x1, y1, x2, y2).buffer(0.0)
# suppress shapely warnings util it incorporates GEOS>=3.11.2
# reference: https://github.com/shapely/shapely/issues/1345
initial_settings = np.seterr()
np.seterr(invalid="ignore")
for polygon in polygons:
cropped_poly_per_obj: list[Polygon] = []
p = np.asarray(polygon.points).copy()
p = geometry.Polygon(p.reshape(-1, 2)).buffer(0.0)
# polygon must be valid to perform intersection.
if not p.is_valid:
# a dummy polygon to avoid misalignment between masks and boxes
polygon.points = [0, 0, 0, 0, 0, 0]
continue
cropped = p.intersection(crop_box)
if cropped.is_empty:
# a dummy polygon to avoid misalignment between masks and boxes
polygon.points = [0, 0, 0, 0, 0, 0]
continue
cropped = cropped.geoms if isinstance(cropped, geometry.collection.BaseMultipartGeometry) else [cropped]
# one polygon may be cropped to multiple ones
for poly in cropped:
# ignore lines or points
if not isinstance(poly, geometry.Polygon) or not poly.is_valid:
continue
coords = np.asarray(poly.exterior.coords)
# remove an extra identical vertex at the end
coords = coords[:-1]
coords[:, 0] -= x1
coords[:, 1] -= y1
cropped_poly_per_obj.append(coords.reshape(-1).tolist())
# a dummy polygon to avoid misalignment between masks and boxes
if len(cropped_poly_per_obj) == 0:
cropped_poly_per_obj.append([0, 0, 0, 0, 0, 0])
polygon.points = list(itertools.chain(*cropped_poly_per_obj))
np.seterr(**initial_settings)
return polygons
[docs]
def get_bboxes_from_masks(masks: Tensor) -> np.ndarray:
"""Create boxes from masks."""
num_masks = len(masks)
bboxes = np.zeros((num_masks, 4), dtype=np.float32)
x_any = masks.any(axis=1)
y_any = masks.any(axis=2)
for idx in range(num_masks):
x = np.where(x_any[idx, :])[0]
y = np.where(y_any[idx, :])[0]
if len(x) > 0 and len(y) > 0:
# use +1 for x_max and y_max so that the right and bottom
# boundary of instance masks are fully included by the box
bboxes[idx, :] = np.array([x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=np.float32)
return bboxes
[docs]
def get_bboxes_from_polygons(polygons: list[Polygon], height: int, width: int) -> np.ndarray:
"""Create boxes from polygons."""
num_polygons = len(polygons)
boxes = np.zeros((num_polygons, 4), dtype=np.float32)
for idx, polygon in enumerate(polygons):
# simply use a number that is big enough for comparison with coordinates
xy_min = np.array([width * 2, height * 2], dtype=np.float32)
xy_max = np.zeros(2, dtype=np.float32)
xy = np.array(polygon.points).reshape(-1, 2).astype(np.float32)
xy_min = np.minimum(xy_min, np.min(xy, axis=0))
xy_max = np.maximum(xy_max, np.max(xy, axis=0))
boxes[idx, :2] = xy_min
boxes[idx, 2:] = xy_max
return boxes
[docs]
def area_polygon(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""Compute the area of a component of a polygon.
Using the shoelace formula:
https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
Args:
x (ndarray): x coordinates of the component
y (ndarray): y coordinates of the component
Return:
(float): the are of the component
"""
return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))