albumentations-team · ternaus · Jan 22, 2025 · Jan 22, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -68,7 +68,7 @@ repos:
       - id: python-use-type-annotations
       - id: text-unicode-replacement-char
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.3.0
+    rev: v2.4.0
     hooks:
       - id: codespell
         additional_dependencies: ["tomli"]

diff --git a/albumentations/augmentations/geometric/functional.py b/albumentations/augmentations/geometric/functional.py
@@ -20,7 +20,9 @@
 from albumentations.augmentations.utils import angle_2pi_range, handle_empty_array
 from albumentations.core.bbox_utils import (
     bboxes_from_masks,
+    bboxes_to_mask,
     denormalize_bboxes,
+    mask_to_bboxes,
     masks_from_bboxes,
     normalize_bboxes,
 )
@@ -1444,30 +1446,25 @@
 ) -> np.ndarray:
     height, width = image_shape[:2]
 
-    # Create inverse mappings
-    x_inv = np.arange(width).reshape(1, -1).repeat(height, axis=0)
-    y_inv = np.arange(height).reshape(-1, 1).repeat(width, axis=1)
+    # Create mask where each keypoint has unique index
+    kp_mask = np.zeros((height, width), dtype=np.int16)
+    for idx, kp in enumerate(keypoints, start=1):
+        x, y = round(kp[0]), round(kp[1])
+        if 0 <= x < width and 0 <= y < height:
+            cv2.circle(kp_mask, (x, y), 1, idx, -1)
 
-    # Extract x and y coordinates
-    x, y = keypoints[:, 0], keypoints[:, 1]
-
-    # Clip coordinates to image boundaries
-    x = np.clip(x, 0, width - 1, out=x)
-    y = np.clip(y, 0, height - 1, out=y)
-
-    # Convert to integer indices
-    x_idx, y_idx = x.astype(int), y.astype(int)
+    # Remap the mask
+    transformed_kp_mask = cv2.remap(kp_mask, map_x, map_y, cv2.INTER_NEAREST)
 
-    # Apply the inverse mapping
-    new_x = x_inv[y_idx, x_idx] + (x - map_x[y_idx, x_idx])
-    new_y = y_inv[y_idx, x_idx] + (y - map_y[y_idx, x_idx])
+    # Extract transformed keypoints
+    new_points = []
+    for idx, kp in enumerate(keypoints, start=1):
+        y_coords, x_coords = np.where(transformed_kp_mask == idx)
+        if len(y_coords) > 0:
+            # Take first occurrence of the point
+            new_points.append(np.concatenate([[x_coords[0], y_coords[0]], kp[2:]]))
 
-    # Clip the new coordinates to ensure they're within the image bounds
-    new_x = np.clip(new_x, 0, width - 1, out=new_x)
-    new_y = np.clip(new_y, 0, height - 1, out=new_y)
-
-    # Create the transformed keypoints array
-    return np.column_stack([new_x, new_y, keypoints[:, 2:]])
+    return np.array(new_points) if new_points else np.zeros((0, keypoints.shape[1]))
 
 
 @handle_empty_array("bboxes")
@@ -1477,53 +1474,18 @@
     map_y: np.ndarray,
     image_shape: tuple[int, int],
 ) -> np.ndarray:
-    # Number of points to sample per dimension
-    grid_size = 5
-
-    num_boxes = len(bboxes)
-    all_points = []
-
-    for box in bboxes:
-        x_min, y_min, x_max, y_max = box[:4]
-
-        # Create grid of points inside and on edges of box
-        x_points = np.linspace(x_min, x_max, grid_size)
-        y_points = np.linspace(y_min, y_max, grid_size)
-        xx, yy = np.meshgrid(x_points, y_points)
-
-        points = np.column_stack([xx.ravel(), yy.ravel()])
-        all_points.append(points)
-
-    # Transform all points
-    all_points = np.vstack(all_points)
-    transformed_points = remap_keypoints(
-        np.column_stack(
-            [all_points, np.zeros(len(all_points)), np.zeros(len(all_points))],
-        ),
-        map_x,
-        map_y,
-        image_shape,
-    )[:, :2]
+    """Remap bounding boxes using displacement maps."""
+    # Convert bboxes to mask
+    bbox_masks = bboxes_to_mask(bboxes, image_shape)
 
-    # Reshape back to per-box points
-    points_per_box = grid_size * grid_size
-    transformed_points = transformed_points.reshape(num_boxes, points_per_box, 2)
+    # Ensure maps are float32
+    map_x = map_x.astype(np.float32)
+    map_y = map_y.astype(np.float32)
 
-    # Get min/max coordinates for each box
-    new_bboxes = np.column_stack(
-        [
-            np.min(transformed_points[:, :, 0], axis=1),  # x_min
-            np.min(transformed_points[:, :, 1], axis=1),  # y_min
-            np.max(transformed_points[:, :, 0], axis=1),  # x_max
-            np.max(transformed_points[:, :, 1], axis=1),  # y_max
-        ],
-    )
+    transformed_masks = remap(bbox_masks, map_x, map_y, cv2.INTER_NEAREST, cv2.BORDER_CONSTANT, value=0)
 
-    return (
-        np.column_stack([new_bboxes, bboxes[:, 4:]])
-        if bboxes.shape[1] > NUM_BBOXES_COLUMNS_IN_ALBUMENTATIONS
-        else new_bboxes
-    )
+    # Convert masks back to bboxes
+    return mask_to_bboxes(transformed_masks, bboxes)
 
 
 def generate_displacement_fields(
@@ -3270,7 +3232,6 @@
 def get_camera_matrix_distortion_maps(
     image_shape: tuple[int, int],
     k: float,
-    center_xy: tuple[float, float],
 ) -> tuple[np.ndarray, np.ndarray]:
     """Generate distortion maps using camera matrix model.
 
@@ -3284,8 +3245,11 @@
         - map_y: Vertical displacement map
     """
     height, width = image_shape[:2]
+
+    center_x, center_y = width / 2, height / 2
+
     camera_matrix = np.array(
-        [[width, 0, center_xy[0]], [0, height, center_xy[1]], [0, 0, 1]],
+        [[width, 0, center_x], [0, height, center_y], [0, 0, 1]],
         dtype=np.float32,
     )
     distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
@@ -3302,7 +3266,6 @@
 def get_fisheye_distortion_maps(
     image_shape: tuple[int, int],
     k: float,
-    center_xy: tuple[float, float],
 ) -> tuple[np.ndarray, np.ndarray]:
     """Generate distortion maps using fisheye model.
 
@@ -3317,8 +3280,7 @@
     """
     height, width = image_shape[:2]
 
-    center_x, center_y = center_xy
-
+    center_x, center_y = width / 2, height / 2
     # Create coordinate grid
     y, x = np.mgrid[:height, :width].astype(np.float32)
 

diff --git a/albumentations/augmentations/geometric/transforms.py b/albumentations/augmentations/geometric/transforms.py
@@ -1479,21 +1479,16 @@ def get_params_dependent_on_data(
         # Get distortion coefficient
         k = self.py_random.uniform(*self.distort_limit)
 
-        # Calculate center shift
-        center_xy = fgeometric.center(image_shape)
-
         # Get distortion maps based on mode
         if self.mode == "camera":
             map_x, map_y = fgeometric.get_camera_matrix_distortion_maps(
                 image_shape,
                 k,
-                center_xy,
             )
         else:  # fisheye
             map_x, map_y = fgeometric.get_fisheye_distortion_maps(
                 image_shape,
                 k,
-                center_xy,
             )
 
         return {"map_x": map_x, "map_y": map_y}

diff --git a/albumentations/core/bbox_utils.py b/albumentations/core/bbox_utils.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from albumentations.augmentations.utils import handle_empty_array
-from albumentations.core.type_definitions import MONO_CHANNEL_DIMENSIONS
+from albumentations.core.type_definitions import MONO_CHANNEL_DIMENSIONS, NUM_BBOXES_COLUMNS_IN_ALBUMENTATIONS
 
 from .utils import DataProcessor, Params, ShapeType
 
@@ -593,3 +593,76 @@ def masks_from_bboxes(bboxes: np.ndarray, shape: ShapeType | tuple[int, int]) ->
         masks[i] = (x_min <= x) & (x < x_max) & (y_min <= y) & (y < y_max)
 
     return masks
+
+
+def bboxes_to_mask(
+    bboxes: np.ndarray,
+    image_shape: tuple[int, int],
+) -> np.ndarray:
+    """Convert bounding boxes to multi-channel binary mask.
+
+    Args:
+        bboxes: Array of bboxes in format [x_min, y_min, x_max, y_max, ...]
+        image_shape: (height, width) of the target mask
+
+    Returns:
+        Binary mask of shape (height, width, num_boxes)
+    """
+    height, width = image_shape[:2]
+    num_boxes = len(bboxes)
+
+    # Create multi-channel mask where each channel represents one bbox
+    bbox_masks = np.zeros((height, width, num_boxes), dtype=np.uint8)
+
+    # Fill each bbox in its channel
+    for idx, box in enumerate(bboxes):
+        x_min, y_min, x_max, y_max = map(round, box[:4])
+        x_min = max(0, min(width - 1, x_min))
+        x_max = max(0, min(width - 1, x_max))
+        y_min = max(0, min(height - 1, y_min))
+        y_max = max(0, min(height - 1, y_max))
+        bbox_masks[y_min : y_max + 1, x_min : x_max + 1, idx] = 1
+
+    return bbox_masks
+
+
+def mask_to_bboxes(
+    masks: np.ndarray,
+    original_bboxes: np.ndarray,
+) -> np.ndarray:
+    """Convert multi-channel binary mask back to bounding boxes.
+
+    Args:
+        masks: Binary mask of shape (height, width, num_boxes)
+        original_bboxes: Original bboxes array to preserve extra columns
+
+    Returns:
+        Array of bboxes in format [x_min, y_min, x_max, y_max, ...]
+    """
+    num_boxes = masks.shape[-1]
+    new_bboxes = []
+
+    num_boxes = masks.shape[-1]
+
+    if num_boxes == 0:
+        # Return empty array with correct shape
+        return np.zeros((0, original_bboxes.shape[1]), dtype=original_bboxes.dtype)
+
+    for idx in range(num_boxes):
+        mask = masks[..., idx]
+        if np.any(mask):
+            y_coords, x_coords = np.where(mask)
+            x_min, x_max = x_coords.min(), x_coords.max()
+            y_min, y_max = y_coords.min(), y_coords.max()
+            new_bboxes.append([x_min, y_min, x_max, y_max])
+        else:
+            # If bbox disappeared, use original coords
+            new_bboxes.append(original_bboxes[idx, :4])
+
+    new_bboxes = np.array(new_bboxes)
+
+    return (
+        np.column_stack([new_bboxes, original_bboxes[:, 4:]])
+        if original_bboxes.shape[1] > NUM_BBOXES_COLUMNS_IN_ALBUMENTATIONS
+        else new_bboxes
+    )