mashraf-222 · codeflash-ai · Oct 1, 2025
diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
@@ -52,13 +52,16 @@ def estimate_orientation(
     Returns:
         the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation)
     """
-    assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
-    thresh = None
+    assert len(img.shape) == 3 and img.shape[-1] in [1, 3], (
+        f"Image shape {img.shape} not supported"
+    )
     # Convert image to grayscale if necessary
     if img.shape[-1] == 3:
         gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         gray_img = cv2.medianBlur(gray_img, 5)
-        thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+        thresh = cv2.threshold(
+            gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
+        )[1]
     else:
         thresh = img.astype(np.uint8)
 
@@ -67,33 +70,40 @@ def estimate_orientation(
         # We rotate the image to the general orientation which improves the detection
         # No expand needed bitmap is already padded
         thresh = rotate_image(thresh, -page_orientation)
-    else:  # That's only required if we do not work on the detection models bin map
+    else:
         # try to merge words in lines
-        (h, w) = img.shape[:2]
-        k_x = max(1, (floor(w / 100)))
-        k_y = max(1, (floor(h / 100)))
+        h, w = img.shape[:2]
+        k_x = max(1, floor(w / 100))
+        k_y = max(1, floor(h / 100))
         kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y))
         thresh = cv2.dilate(thresh, kernel, iterations=1)
 
     # extract contours
     contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
 
-    # Filter & Sort contours
-    contours = sorted(
-        [contour for contour in contours if cv2.contourArea(contour) > lower_area],
-        key=get_max_width_length_ratio,
-        reverse=True,
-    )
+    # Filter & Sort contours in one pass
+    filtered_contours = [
+        contour for contour in contours if cv2.contourArea(contour) > lower_area
+    ]
+    if filtered_contours:
+        contours_sorted = sorted(
+            filtered_contours, key=get_max_width_length_ratio, reverse=True
+        )
+    else:
+        contours_sorted = []
 
     angles = []
-    for contour in contours[:n_ct]:
+    for contour in contours_sorted[:n_ct]:
         _, (w, h), angle = cv2.minAreaRect(contour)
-        if w / h > ratio_threshold_for_lines:  # select only contours with ratio like lines
+        if h == 0:
+            continue  # avoid division by zero
+        ratio = w / h
+        if ratio > ratio_threshold_for_lines:
             angles.append(angle)
-        elif w / h < 1 / ratio_threshold_for_lines:  # if lines are vertical, substract 90 degree
+        elif ratio < 1 / ratio_threshold_for_lines:
             angles.append(angle - 90)
 
-    if len(angles) == 0:
+    if not angles:
         estimated_angle = 0  # in case no angles is found
     else:
         median = -median_low(angles)
@@ -107,7 +117,11 @@ def estimate_orientation(
         # so in this case we prefer the general page orientation
         if abs(estimated_angle) == abs(page_orientation):
             return page_orientation
-        estimated_angle = estimated_angle if page_orientation == 0 else page_orientation + estimated_angle
+        estimated_angle = (
+            estimated_angle
+            if page_orientation == 0
+            else page_orientation + estimated_angle
+        )
         if estimated_angle > 180:
             estimated_angle -= 360
 
@@ -127,7 +141,10 @@ def rectify_crops(
     # Inverse predictions (if angle of +90 is detected, rotate by -90)
     orientations = [4 - pred if pred != 0 else 0 for pred in orientations]
     return (
-        [crop if orientation == 0 else np.rot90(crop, orientation) for orientation, crop in zip(orientations, crops)]
+        [
+            crop if orientation == 0 else np.rot90(crop, orientation)
+            for orientation, crop in zip(orientations, crops)
+        ]
         if len(orientations) > 0
         else []
     )
@@ -184,9 +201,13 @@ def invert_data_structure(
         dictionary of list when x is a list of dictionaries or a list of dictionaries when x is dictionary of lists
     """
     if isinstance(x, dict):
-        assert len({len(v) for v in x.values()}) == 1, "All the lists in the dictionary should have the same length."
+        assert len({len(v) for v in x.values()}) == 1, (
+            "All the lists in the dictionary should have the same length."
+        )
         return [dict(zip(x, t)) for t in zip(*x.values())]
     elif isinstance(x, list):
         return {k: [dic[k] for dic in x] for k in x[0]}
     else:
-        raise TypeError(f"Expected input to be either a dict or a list, got {type(input)} instead.")
+        raise TypeError(
+            f"Expected input to be either a dict or a list, got {type(input)} instead."
+        )
diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py
@@ -74,7 +74,9 @@ def _detach(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
     return list(loc_preds), list(obj_scores)
 
 
-def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBox | np.ndarray:
+def resolve_enclosing_bbox(
+    bboxes: list[BoundingBox] | np.ndarray,
+) -> BoundingBox | np.ndarray:
     """Compute enclosing bbox either from:
 
     Args:
@@ -96,7 +98,9 @@ def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBo
         return (min(x), min(y)), (max(x), max(y))
 
 
-def resolve_enclosing_rbbox(rbboxes: list[np.ndarray], intermed_size: int = 1024) -> np.ndarray:
+def resolve_enclosing_rbbox(
+    rbboxes: list[np.ndarray], intermed_size: int = 1024
+) -> np.ndarray:
     """Compute enclosing rotated bbox either from:
 
     Args:
@@ -130,7 +134,11 @@ def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
     """
     angle_rad = angle * np.pi / 180.0  # compute radian angle for np functions
     rotation_mat = np.array(
-        [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype
+        [
+            [np.cos(angle_rad), -np.sin(angle_rad)],
+            [np.sin(angle_rad), np.cos(angle_rad)],
+        ],
+        dtype=points.dtype,
     )
     return np.matmul(points, rotation_mat.T)
 
@@ -145,10 +153,12 @@ def compute_expanded_shape(img_shape: tuple[int, int], angle: float) -> tuple[in
     Returns:
         the height and width of the rotated image
     """
-    points: np.ndarray = np.array([
-        [img_shape[1] / 2, img_shape[0] / 2],
-        [-img_shape[1] / 2, img_shape[0] / 2],
-    ])
+    points: np.ndarray = np.array(
+        [
+            [img_shape[1] / 2, img_shape[0] / 2],
+            [-img_shape[1] / 2, img_shape[0] / 2],
+        ]
+    )
 
     rotated_points = rotate_abs_points(points, angle)
 
@@ -176,7 +186,10 @@ def rotate_abs_geoms(
     """
     # Switch to polygons
     polys = (
-        np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1)
+        np.stack(
+            [geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]],
+            axis=1,
+        )
         if geoms.ndim == 2
         else geoms
     )
@@ -191,13 +204,19 @@ def rotate_abs_geoms(
     # Switch back to top-left corner as referential
     target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape
     # Clip coords to fit since there is no expansion
-    rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1])
-    rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0])
+    rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(
+        0, target_shape[1]
+    )
+    rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(
+        0, target_shape[0]
+    )
 
     return rotated_polys
 
 
-def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]) -> np.ndarray:
+def remap_boxes(
+    loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]
+) -> np.ndarray:
     """Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape.
     This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox
     coordinates after a resizing of the image.
@@ -213,12 +232,18 @@ def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape:
     if len(dest_shape) != 2:
         raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
     if len(orig_shape) != 2:
-        raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}")
+        raise ValueError(
+            f"Image_shape length should be 2, was found at: {len(orig_shape)}"
+        )
     orig_height, orig_width = orig_shape
     dest_height, dest_width = dest_shape
     mboxes = loc_preds.copy()
-    mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width
-    mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height
+    mboxes[:, :, 0] = (
+        (loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2
+    ) / dest_width
+    mboxes[:, :, 1] = (
+        (loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2
+    ) / dest_height
 
     return mboxes
 
@@ -263,19 +288,31 @@ def rotate_boxes(
     # Compute rotation matrix
     angle_rad = angle * np.pi / 180.0  # compute radian angle for np functions
     rotation_mat = np.array(
-        [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype
+        [
+            [np.cos(angle_rad), -np.sin(angle_rad)],
+            [np.sin(angle_rad), np.cos(angle_rad)],
+        ],
+        dtype=_boxes.dtype,
     )
     # Rotate absolute points
-    points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1)
+    points: np.ndarray = np.stack(
+        (_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1
+    )
     image_center = (orig_shape[1] / 2, orig_shape[0] / 2)
     rotated_points = image_center + np.matmul(points - image_center, rotation_mat)
     rotated_boxes: np.ndarray = np.stack(
-        (rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1
+        (
+            rotated_points[:, :, 0] / orig_shape[1],
+            rotated_points[:, :, 1] / orig_shape[0],
+        ),
+        axis=-1,
     )
 
     # Apply a mask if requested
     if target_shape is not None:
-        rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape)
+        rotated_boxes = remap_boxes(
+            rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape
+        )
 
     return rotated_boxes
 
@@ -297,34 +334,59 @@ def rotate_image(
     Returns:
         Rotated array, padded by 0 by default.
     """
+    # Early exit for zero angle and no padding/resize requested
+    if angle == 0 and not expand and not preserve_origin_shape:
+        return image
+
     # Compute the expanded padding
-    exp_img: np.ndarray
     if expand:
         exp_shape = compute_expanded_shape(image.shape[:2], angle)
-        h_pad, w_pad = (
-            int(max(0, ceil(exp_shape[0] - image.shape[0]))),
-            int(max(0, ceil(exp_shape[1] - image.shape[1]))),
+        exp_h, exp_w = exp_shape
+        h, w = image.shape[:2]
+
+        h_pad = int(max(0, ceil(exp_h - h)))
+        w_pad = int(max(0, ceil(exp_w - w)))
+        pad_spec = (
+            (h_pad // 2, h_pad - h_pad // 2),
+            (w_pad // 2, w_pad - w_pad // 2),
+            (0, 0),
         )
-        exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
+        exp_img = np.pad(image, pad_spec)
+        src_h, src_w = exp_img.shape[:2]
     else:
         exp_img = image
+        src_h, src_w = exp_img.shape[:2]
+
+    rot_mat = cv2.getRotationMatrix2D((src_w / 2, src_h / 2), angle, 1.0)
+    rot_img = cv2.warpAffine(exp_img, rot_mat, (src_w, src_h))
 
-    height, width = exp_img.shape[:2]
-    rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0)
-    rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height))
     if expand:
-        # Pad to get the same aspect ratio
-        if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]):
+        # Pad to get the same aspect ratio if needed
+        orig_h, orig_w = image.shape[:2]
+        orig_ratio = orig_h / orig_w
+        rot_ratio = rot_img.shape[0] / rot_img.shape[1]
+        if orig_ratio != rot_ratio:
             # Pad width
-            if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]):
-                h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1])
+            if rot_ratio > orig_ratio:
+                new_w = int(rot_img.shape[0] * orig_w / orig_h)
+                w_pad = new_w - rot_img.shape[1]
+                h_pad = 0
             # Pad height
             else:
-                h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
-            rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
+                new_h = int(rot_img.shape[1] * orig_h / orig_w)
+                h_pad = new_h - rot_img.shape[0]
+                w_pad = 0
+            pad_spec = (
+                (h_pad // 2, h_pad - h_pad // 2),
+                (w_pad // 2, w_pad - w_pad // 2),
+                (0, 0),
+            )
+            rot_img = np.pad(rot_img, pad_spec)
+
         if preserve_origin_shape:
-            # rescale
-            rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
+            rot_img = cv2.resize(
+                rot_img, (orig_w, orig_h), interpolation=cv2.INTER_LINEAR
+            )
 
     return rot_img
 
@@ -359,13 +421,17 @@ def estimate_page_angle(polys: np.ndarray) -> float:
     with np.errstate(divide="raise", invalid="raise"):
         try:
             return float(
-                np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi)  # Y axis from top to bottom!
+                np.median(
+                    np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi
+                )  # Y axis from top to bottom!
             )
         except FloatingPointError:
             return 0.0
 
 
-def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) -> np.ndarray:
+def convert_to_relative_coords(
+    geoms: np.ndarray, img_shape: tuple[int, int]
+) -> np.ndarray:
     """Convert a geometry to relative coordinates
 
     Args:
@@ -404,7 +470,9 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]:
     if boxes.shape[0] == 0:
         return []
     if boxes.shape[1] != 4:
-        raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)")
+        raise AssertionError(
+            "boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)"
+        )
 
     # Project relative coordinates
     _boxes = boxes.copy()
@@ -420,7 +488,10 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]:
 
 
 def extract_rcrops(
-    img: np.ndarray, polys: np.ndarray, dtype=np.float32, assume_horizontal: bool = False
+    img: np.ndarray,
+    polys: np.ndarray,
+    dtype=np.float32,
+    assume_horizontal: bool = False,
 ) -> list[np.ndarray]:
     """Created cropped images from list of rotated bounding boxes
 
@@ -436,7 +507,9 @@ def extract_rcrops(
     if polys.shape[0] == 0:
         return []
     if polys.shape[1:] != (4, 2):
-        raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)")
+        raise AssertionError(
+            "polys are expected to be quadrilateral, of shape (N, 4, 2)"
+        )
 
     # Project relative coordinates
     _boxes = polys.copy()