Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #2286

Merged
merged 1 commit into from
Jan 22, 2025
Merged

Fix #2286

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ repos:
- id: python-use-type-annotations
- id: text-unicode-replacement-char
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
rev: v2.4.0
hooks:
- id: codespell
additional_dependencies: ["tomli"]
Expand Down
102 changes: 32 additions & 70 deletions albumentations/augmentations/geometric/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from albumentations.augmentations.utils import angle_2pi_range, handle_empty_array
from albumentations.core.bbox_utils import (
bboxes_from_masks,
bboxes_to_mask,
denormalize_bboxes,
mask_to_bboxes,
masks_from_bboxes,
normalize_bboxes,
)
Expand Down Expand Up @@ -1444,30 +1446,25 @@
) -> np.ndarray:
height, width = image_shape[:2]

# Create inverse mappings
x_inv = np.arange(width).reshape(1, -1).repeat(height, axis=0)
y_inv = np.arange(height).reshape(-1, 1).repeat(width, axis=1)
# Create mask where each keypoint has unique index
kp_mask = np.zeros((height, width), dtype=np.int16)
for idx, kp in enumerate(keypoints, start=1):
ternaus marked this conversation as resolved.
Show resolved Hide resolved
x, y = round(kp[0]), round(kp[1])
if 0 <= x < width and 0 <= y < height:
cv2.circle(kp_mask, (x, y), 1, idx, -1)

# Extract x and y coordinates
x, y = keypoints[:, 0], keypoints[:, 1]

# Clip coordinates to image boundaries
x = np.clip(x, 0, width - 1, out=x)
y = np.clip(y, 0, height - 1, out=y)

# Convert to integer indices
x_idx, y_idx = x.astype(int), y.astype(int)
# Remap the mask
transformed_kp_mask = cv2.remap(kp_mask, map_x, map_y, cv2.INTER_NEAREST)

# Apply the inverse mapping
new_x = x_inv[y_idx, x_idx] + (x - map_x[y_idx, x_idx])
new_y = y_inv[y_idx, x_idx] + (y - map_y[y_idx, x_idx])
# Extract transformed keypoints
new_points = []
for idx, kp in enumerate(keypoints, start=1):
y_coords, x_coords = np.where(transformed_kp_mask == idx)
if len(y_coords) > 0:
# Take first occurrence of the point
new_points.append(np.concatenate([[x_coords[0], y_coords[0]], kp[2:]]))

# Clip the new coordinates to ensure they're within the image bounds
new_x = np.clip(new_x, 0, width - 1, out=new_x)
new_y = np.clip(new_y, 0, height - 1, out=new_y)

# Create the transformed keypoints array
return np.column_stack([new_x, new_y, keypoints[:, 2:]])
return np.array(new_points) if new_points else np.zeros((0, keypoints.shape[1]))


@handle_empty_array("bboxes")
Expand All @@ -1477,53 +1474,18 @@
map_y: np.ndarray,
image_shape: tuple[int, int],
) -> np.ndarray:
# Number of points to sample per dimension
grid_size = 5

num_boxes = len(bboxes)
all_points = []

for box in bboxes:
x_min, y_min, x_max, y_max = box[:4]

# Create grid of points inside and on edges of box
x_points = np.linspace(x_min, x_max, grid_size)
y_points = np.linspace(y_min, y_max, grid_size)
xx, yy = np.meshgrid(x_points, y_points)

points = np.column_stack([xx.ravel(), yy.ravel()])
all_points.append(points)

# Transform all points
all_points = np.vstack(all_points)
transformed_points = remap_keypoints(
np.column_stack(
[all_points, np.zeros(len(all_points)), np.zeros(len(all_points))],
),
map_x,
map_y,
image_shape,
)[:, :2]
"""Remap bounding boxes using displacement maps."""
# Convert bboxes to mask
bbox_masks = bboxes_to_mask(bboxes, image_shape)

# Reshape back to per-box points
points_per_box = grid_size * grid_size
transformed_points = transformed_points.reshape(num_boxes, points_per_box, 2)
# Ensure maps are float32
map_x = map_x.astype(np.float32)
map_y = map_y.astype(np.float32)

# Get min/max coordinates for each box
new_bboxes = np.column_stack(
[
np.min(transformed_points[:, :, 0], axis=1), # x_min
np.min(transformed_points[:, :, 1], axis=1), # y_min
np.max(transformed_points[:, :, 0], axis=1), # x_max
np.max(transformed_points[:, :, 1], axis=1), # y_max
],
)
transformed_masks = remap(bbox_masks, map_x, map_y, cv2.INTER_NEAREST, cv2.BORDER_CONSTANT, value=0)

return (
np.column_stack([new_bboxes, bboxes[:, 4:]])
if bboxes.shape[1] > NUM_BBOXES_COLUMNS_IN_ALBUMENTATIONS
else new_bboxes
)
# Convert masks back to bboxes
return mask_to_bboxes(transformed_masks, bboxes)


def generate_displacement_fields(
Expand Down Expand Up @@ -3270,7 +3232,6 @@
def get_camera_matrix_distortion_maps(
image_shape: tuple[int, int],
k: float,
center_xy: tuple[float, float],
) -> tuple[np.ndarray, np.ndarray]:
"""Generate distortion maps using camera matrix model.

Expand All @@ -3284,8 +3245,11 @@
- map_y: Vertical displacement map
"""
height, width = image_shape[:2]

center_x, center_y = width / 2, height / 2

camera_matrix = np.array(
[[width, 0, center_xy[0]], [0, height, center_xy[1]], [0, 0, 1]],
[[width, 0, center_x], [0, height, center_y], [0, 0, 1]],
dtype=np.float32,
)
distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
Expand All @@ -3302,7 +3266,6 @@
def get_fisheye_distortion_maps(
image_shape: tuple[int, int],
k: float,
center_xy: tuple[float, float],
) -> tuple[np.ndarray, np.ndarray]:
"""Generate distortion maps using fisheye model.

Expand All @@ -3317,8 +3280,7 @@
"""
height, width = image_shape[:2]

center_x, center_y = center_xy

center_x, center_y = width / 2, height / 2

Check warning on line 3283 in albumentations/augmentations/geometric/functional.py

View check run for this annotation

Codecov / codecov/patch

albumentations/augmentations/geometric/functional.py#L3283

Added line #L3283 was not covered by tests
# Create coordinate grid
y, x = np.mgrid[:height, :width].astype(np.float32)

Expand Down
5 changes: 0 additions & 5 deletions albumentations/augmentations/geometric/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,21 +1479,16 @@ def get_params_dependent_on_data(
# Get distortion coefficient
k = self.py_random.uniform(*self.distort_limit)

# Calculate center shift
center_xy = fgeometric.center(image_shape)

# Get distortion maps based on mode
if self.mode == "camera":
map_x, map_y = fgeometric.get_camera_matrix_distortion_maps(
image_shape,
k,
center_xy,
)
else: # fisheye
map_x, map_y = fgeometric.get_fisheye_distortion_maps(
image_shape,
k,
center_xy,
)

return {"map_x": map_x, "map_y": map_y}
Expand Down
75 changes: 74 additions & 1 deletion albumentations/core/bbox_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from albumentations.augmentations.utils import handle_empty_array
from albumentations.core.type_definitions import MONO_CHANNEL_DIMENSIONS
from albumentations.core.type_definitions import MONO_CHANNEL_DIMENSIONS, NUM_BBOXES_COLUMNS_IN_ALBUMENTATIONS

from .utils import DataProcessor, Params, ShapeType

Expand Down Expand Up @@ -593,3 +593,76 @@ def masks_from_bboxes(bboxes: np.ndarray, shape: ShapeType | tuple[int, int]) ->
masks[i] = (x_min <= x) & (x < x_max) & (y_min <= y) & (y < y_max)

return masks


def bboxes_to_mask(
bboxes: np.ndarray,
image_shape: tuple[int, int],
) -> np.ndarray:
"""Convert bounding boxes to multi-channel binary mask.

Args:
bboxes: Array of bboxes in format [x_min, y_min, x_max, y_max, ...]
image_shape: (height, width) of the target mask

Returns:
Binary mask of shape (height, width, num_boxes)
"""
height, width = image_shape[:2]
num_boxes = len(bboxes)

# Create multi-channel mask where each channel represents one bbox
bbox_masks = np.zeros((height, width, num_boxes), dtype=np.uint8)

# Fill each bbox in its channel
for idx, box in enumerate(bboxes):
x_min, y_min, x_max, y_max = map(round, box[:4])
x_min = max(0, min(width - 1, x_min))
x_max = max(0, min(width - 1, x_max))
y_min = max(0, min(height - 1, y_min))
y_max = max(0, min(height - 1, y_max))
bbox_masks[y_min : y_max + 1, x_min : x_max + 1, idx] = 1

return bbox_masks


def mask_to_bboxes(
masks: np.ndarray,
original_bboxes: np.ndarray,
) -> np.ndarray:
"""Convert multi-channel binary mask back to bounding boxes.

Args:
masks: Binary mask of shape (height, width, num_boxes)
original_bboxes: Original bboxes array to preserve extra columns

Returns:
Array of bboxes in format [x_min, y_min, x_max, y_max, ...]
"""
num_boxes = masks.shape[-1]
new_bboxes = []

num_boxes = masks.shape[-1]

if num_boxes == 0:
# Return empty array with correct shape
return np.zeros((0, original_bboxes.shape[1]), dtype=original_bboxes.dtype)

for idx in range(num_boxes):
mask = masks[..., idx]
if np.any(mask):
y_coords, x_coords = np.where(mask)
x_min, x_max = x_coords.min(), x_coords.max()
y_min, y_max = y_coords.min(), y_coords.max()
new_bboxes.append([x_min, y_min, x_max, y_max])
else:
# If bbox disappeared, use original coords
new_bboxes.append(original_bboxes[idx, :4])

new_bboxes = np.array(new_bboxes)

return (
np.column_stack([new_bboxes, original_bboxes[:, 4:]])
if original_bboxes.shape[1] > NUM_BBOXES_COLUMNS_IN_ALBUMENTATIONS
else new_bboxes
)
Loading
Loading