From 512ea57079cb856f6f643792a5ac3b669c8eede0 Mon Sep 17 00:00:00 2001 From: krishauser Date: Mon, 24 Feb 2025 11:01:11 -0500 Subject: [PATCH 01/55] Added items committed to s2024 --- GEMstack/mathutils/dubins.py | 2 +- GEMstack/mathutils/transforms.py | 21 +++++++++++++++++++++ GEMstack/onboard/interface/gem_hardware.py | 9 ++++++--- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/GEMstack/mathutils/dubins.py b/GEMstack/mathutils/dubins.py index cdb4655ab..b840201d9 100644 --- a/GEMstack/mathutils/dubins.py +++ b/GEMstack/mathutils/dubins.py @@ -44,7 +44,7 @@ def derivative(self,x,u): right = [-fwd[1],fwd[0]] phi = u[1] d = u[0] - return np.array([fwd[0]*d,fwd[1]*d,phi]) + return np.array([fwd[0]*d,fwd[1]*d,phi*d]) class DubinsCarIntegrator(ControlSpace): diff --git a/GEMstack/mathutils/transforms.py b/GEMstack/mathutils/transforms.py index 833ecc80d..a29ec48e2 100644 --- a/GEMstack/mathutils/transforms.py +++ b/GEMstack/mathutils/transforms.py @@ -36,6 +36,14 @@ def vector_dist(v1, v2) -> float: """Euclidean distance between two vectors""" return vo.distance(v1,v2) +def vector_dot(v1, v2) -> float: + """Dot product between two vectors""" + return vo.dot(v1,v2) + +def vector_cross(v1, v2) -> float: + """Cross product between two 2D vectors""" + return vo.cross(v1,v2) + def vector2_angle(v1, v2 = None) -> float: """find the ccw angle bewtween two 2d vectors""" if v2 is None: @@ -123,3 +131,16 @@ def xy_to_lat_lon(x_east : float, y_north : float, lat_reference : float, lon_re # convert GNSS waypoints into local fixed frame reprented in x and y lat, lon = axy.xy2ll(x_east, y_north, lat_reference, lon_reference) return lat, lon + +def quaternion_to_euler(x : float, y : float, z : float, w : float): + t0 = +2.0 * (w * x + y * z) + t1 = +1.0 - 2.0 * (x * x + y * y) + roll = np.arctan2(t0, t1) + t2 = +2.0 * (w * y - z * x) + t2 = +1.0 if t2 > +1.0 else t2 + t2 = -1.0 if t2 < -1.0 else t2 + pitch = np.arcsin(t2) + t3 = +2.0 * (w * z + x * y) + t4 = +1.0 - 2.0 * (y * y + z * z) + yaw = np.arctan2(t3, t4) + return [roll, pitch, yaw] diff --git a/GEMstack/onboard/interface/gem_hardware.py b/GEMstack/onboard/interface/gem_hardware.py index e13ff817e..4fb95d4fc 100644 --- a/GEMstack/onboard/interface/gem_hardware.py +++ b/GEMstack/onboard/interface/gem_hardware.py @@ -1,6 +1,7 @@ from .gem import * from ...utils import settings import math +import time # ROS Headers import rospy @@ -151,6 +152,7 @@ def subscribe_sensor(self, name, callback, type = None): if name == 'gnss': topic = self.ros_sensor_topics[name] if topic.endswith('inspva'): + #GEM e2 uses Novatel GNSS if type is not None and (type is not Inspva and type is not GNSSReading): raise ValueError("GEMHardwareInterface GEM e2 only supports Inspva/GNSSReading for GNSS") if type is Inspva: @@ -169,7 +171,7 @@ def callback_with_gnss_reading(inspva_msg: Inspva): callback(GNSSReading(pose,speed,inspva_msg.status)) self.gnss_sub = rospy.Subscriber(topic, Inspva, callback_with_gnss_reading) else: - #assume it's septentrio + #assume it's septentrio on GEM e4 if type is not None and (type is not INSNavGeod and type is not GNSSReading): raise ValueError("GEMHardwareInterface GEM e4 only supports INSNavGeod/GNSSReading for GNSS") if type is INSNavGeod: @@ -177,8 +179,9 @@ def callback_with_gnss_reading(inspva_msg: Inspva): else: def callback_with_gnss_reading(msg: INSNavGeod): pose = ObjectPose(ObjectFrameEnum.GLOBAL, - x=msg.longitude, - y=msg.latitude, + t=time.time(), + x=math.degrees(msg.longitude), #Septentrio GNSS uses radians rather than degrees + y=math.degrees(msg.latitude), z=msg.height, yaw=math.radians(msg.heading), #heading from north in degrees (TODO: maybe?? check this) roll=math.radians(msg.roll), From cfadcf4c6d568c83f275f0e36b717cb28b3a5b78 Mon Sep 17 00:00:00 2001 From: krishauser Date: Mon, 24 Feb 2025 11:04:02 -0500 Subject: [PATCH 02/55] Changed time.time to self.time --- GEMstack/onboard/interface/gem_hardware.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEMstack/onboard/interface/gem_hardware.py b/GEMstack/onboard/interface/gem_hardware.py index 4fb95d4fc..836d7ef71 100644 --- a/GEMstack/onboard/interface/gem_hardware.py +++ b/GEMstack/onboard/interface/gem_hardware.py @@ -179,7 +179,7 @@ def callback_with_gnss_reading(inspva_msg: Inspva): else: def callback_with_gnss_reading(msg: INSNavGeod): pose = ObjectPose(ObjectFrameEnum.GLOBAL, - t=time.time(), + t=self.time(), x=math.degrees(msg.longitude), #Septentrio GNSS uses radians rather than degrees y=math.degrees(msg.latitude), z=msg.height, From a03c2044f2f033c0c7dad19afe2cdd278df54953 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 23 Apr 2025 10:39:57 -0500 Subject: [PATCH 03/55] Create cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 666 ++++++++++++++++++ 1 file changed, 666 insertions(+) create mode 100644 GEMstack/onboard/perception/cone_detection.py diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py new file mode 100644 index 000000000..d41c527ae --- /dev/null +++ b/GEMstack/onboard/perception/cone_detection.py @@ -0,0 +1,666 @@ +from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +from ..interface.gem import GEMInterface +from ..component import Component +from ultralytics import YOLO +import cv2 +from typing import Dict +import open3d as o3d +import numpy as np +from sklearn.cluster import DBSCAN +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +import sensor_msgs.point_cloud2 as pc2 +import struct, ctypes +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import math +import ros_numpy +import os + + +# ----- Helper Functions ----- + +def cylindrical_roi(points, center, radius, height): + horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1) + vertical_diff = np.abs(points[:, 2] - center[2]) + mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2) + return points[mask] + + +def undistort_image(image, K, D): + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + undistorted = cv2.undistort(image, K, D, None, newK) + return undistorted, newK + +def filter_points_within_threshold(points, threshold=15.0): + distances = np.linalg.norm(points, axis=1) + mask = distances <= threshold + return points[mask] + + +def match_existing_cone( + new_center: np.ndarray, + new_dims: tuple, + existing_agents: Dict[str, AgentState], + distance_threshold: float = 1.0 +) -> str: + """ + Find the closest existing Cone agent within a specified distance threshold. + """ + best_agent_id = None + best_dist = float('inf') + for agent_id, agent_state in existing_agents.items(): + old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) + dist = np.linalg.norm(new_center - old_center) + if dist < distance_threshold and dist < best_dist: + best_dist = dist + best_agent_id = agent_id + return best_agent_id + + +def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: + """ + Compute the (vx, vy, vz) velocity based on change in pose over time. + """ + if dt <= 0: + return (0, 0, 0) + vx = (new_pose.x - old_pose.x) / dt + vy = (new_pose.y - old_pose.y) / dt + vz = (new_pose.z - old_pose.z) / dt + return (vx, vy, vz) + + +def extract_roi_box(lidar_pc, center, half_extents): + """ + Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. + """ + lower = center - half_extents + upper = center + half_extents + mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) + return lidar_pc[mask] + + +def pc2_to_numpy(pc2_msg, want_rgb=False): + """ + Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. + This function extracts the x, y, z coordinates from the point cloud. + """ + # Convert the ROS message to a numpy structured array + pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) + # Stack x,y,z fields to a (N,3) array + pts = np.stack((np.array(pc['x']).ravel(), + np.array(pc['y']).ravel(), + np.array(pc['z']).ravel()), axis=1) + # Apply filtering (for example, x > 0 and z in a specified range) + mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) + return pts[mask] + + +def backproject_pixel(u, v, K): + """ + Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. + """ + cx, cy = K[0, 2], K[1, 2] + fx, fy = K[0, 0], K[1, 1] + x = (u - cx) / fx + y = (v - cy) / fy + ray_dir = np.array([x, y, 1.0]) + return ray_dir / np.linalg.norm(ray_dir) + + +def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, + t_min, t_max, t_step, + distance_threshold, min_points, ransac_threshold): + """ + Identify the center of a human along a projected ray. + (This function is no longer used in the new approach.) + """ + return None, None, None + + +def extract_roi(pc, center, roi_radius): + """ + Extract points from a point cloud that lie within a specified radius of a center point. + """ + distances = np.linalg.norm(pc - center, axis=1) + return pc[distances < roi_radius] + + +def refine_cluster(roi_points, center, eps=0.2, min_samples=10): + """ + Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. + """ + if roi_points.shape[0] < min_samples: + return roi_points + clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) + labels = clustering.labels_ + valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] + if not valid_clusters: + return roi_points + best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) + return best_cluster + + +def remove_ground_by_min_range(cluster, z_range=0.05): + """ + Remove points within z_range of the minimum z (assumed to be ground). + """ + if cluster is None or cluster.shape[0] == 0: + return cluster + min_z = np.min(cluster[:, 2]) + filtered = cluster[cluster[:, 2] > (min_z + z_range)] + return filtered + + +def get_bounding_box_center_and_dimensions(points): + """ + Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. + """ + if points.shape[0] == 0: + return None, None + min_vals = np.min(points, axis=0) + max_vals = np.max(points, axis=0) + center = (min_vals + max_vals) / 2 + dimensions = max_vals - min_vals + return center, dimensions + + +def create_ray_line_set(start, end): + """ + Create an Open3D LineSet object representing a ray between two 3D points. + The line is colored yellow. + """ + points = [start, end] + lines = [[0, 1]] + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) + return line_set + + +def downsample_points(lidar_points, voxel_size=0.15): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(lidar_points) + down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) + return np.asarray(down_pcd.points) + + +def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True): + if lidar_points.shape[0] == 0: + return lidar_points + + if use_norm: + depths = np.linalg.norm(lidar_points, axis=1) + else: + depths = lidar_points[:, 0] + + min_depth = np.min(depths) + max_possible_depth = min_depth + max_depth_diff + mask = depths < max_possible_depth + return lidar_points[mask] + + +def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): + """ + Visualize a list of Open3D geometry objects in a dedicated window. + """ + vis = o3d.visualization.Visualizer() + vis.create_window(window_name=window_name, width=width, height=height) + for geom in geometries: + vis.add_geometry(geom) + opt = vis.get_render_option() + opt.point_size = point_size + vis.run() + vis.destroy_window() + + +def pose_to_matrix(pose): + """ + Compose a 4x4 transformation matrix from a pose state. + Assumes pose has attributes: x, y, z, yaw, pitch, roll, + where the angles are given in degrees. + """ + x = pose.x if pose.x is not None else 0.0 + y = pose.y if pose.y is not None else 0.0 + z = pose.z if pose.z is not None else 0.0 + if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: + yaw = math.radians(pose.yaw) + pitch = math.radians(pose.pitch) + roll = math.radians(pose.roll) + else: + yaw = 0.0 + pitch = 0.0 + roll = 0.0 + R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() + T = np.eye(4) + T[:3, :3] = R_mat + T[:3, 3] = np.array([x, y, z]) + return T + + +def transform_points_l2c(lidar_points, T_l2c): + N = lidar_points.shape[0] + pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) + pts_cam = (T_l2c @ pts_hom.T).T # (N,4) + return pts_cam[:, :3] + + +# ----- New: Vectorized projection function ----- +def project_points(pts_cam, K, original_lidar_points): + """ + Vectorized version. + pts_cam: (N,3) array of points in camera coordinates. + original_lidar_points: (N,3) array of points in LiDAR coordinates. + Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. + """ + mask = pts_cam[:, 2] > 0 + pts_cam_valid = pts_cam[mask] + lidar_valid = original_lidar_points[mask] + Xc = pts_cam_valid[:, 0] + Yc = pts_cam_valid[:, 1] + Zc = pts_cam_valid[:, 2] + u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) + v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) + proj = np.column_stack((u, v, lidar_valid)) + return proj + + +class ConeDetector3D(Component): + """ + Detects cones by fusing YOLO 2D detections with LiDAR point cloud data. + + Tracking is optional: set `enable_tracking=False` to disable persistent tracking + and return only detections from the current frame. + """ + + def __init__(self, vehicle_interface: GEMInterface): + self.vehicle_interface = vehicle_interface + self.enable_tracking = False + self.current_agents = {} + self.tracked_agents = {} + self.cone_counter = 0 + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.start_pose_abs = None + self.camera_front = True + self.visualize_2d = False + self.use_cyl_roi = False + self.start_time = None + self.use_start_frame = False + self.save_data = False + self.orientation = False + + def rate(self) -> float: + return 4.0 + + def state_inputs(self) -> list: + return ['vehicle'] + + def state_outputs(self) -> list: + return ['agents'] + + def initialize(self): + self.rgb_sub = Subscriber('/oak/rgb/image_raw', Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer([self.rgb_sub, self.lidar_sub], + queue_size=10, slop=0.1) + self.sync.registerCallback(self.synchronized_callback) + self.detector = YOLO('/home/gem/s2025_perception_merge/GEMstack/GEMstack/knowledge/detection/cone.pt') + self.detector.to('cuda') + + if self.camera_front: + self.K = np.array([[684.83331299, 0., 573.37109375], + [0., 684.60968018, 363.70092773], + [0., 0., 1.]]) + else: + self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02], + [0.00000000e+00, 1.17514569e+03, 6.08580326e+02], + [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) + + if self.camera_front: + self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) + else: + self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05, + -6.19939758e-02]) + + self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0., 0., 0., 1.]]) + if self.camera_front: + self.T_l2c = np.array([ + [0.001090, -0.999489, -0.031941, 0.149698], + [-0.007664, 0.031932, -0.999461, -0.397813], + [0.999970, 0.001334, -0.007625, -0.691405], + [0., 0., 0., 1.000000] + ]) + else: + self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003], + [-0.09720448, 0.13371206, -0.98624154, -0.1598301], + [0.68884317, -0.7061996, -0.16363744, -1.04767285], + [0., 0., 0., 1.]] + ) + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] + self.camera_origin_in_lidar = self.T_c2l[:3, 3] + + def synchronized_callback(self, image_msg, lidar_msg): + step1 = time.time() + try: + self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") + except Exception as e: + rospy.logerr("Failed to convert image: {}".format(e)) + self.latest_image = None + step2 = time.time() + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + step3 = time.time() + print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2) + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + downsample = False + if self.latest_image is None or self.latest_lidar is None: + return {} + + # Ensure data/ exists and build timestamp + os.makedirs("data", exist_ok=True) + current_time = self.vehicle_interface.time() + if self.start_time is None: + self.start_time = current_time + time_elapsed = current_time - self.start_time + + if self.save_data: + tstamp = int(self.vehicle_interface.time() * 1000) + # 1) Dump raw image + cv2.imwrite(f"data/{tstamp}_image.png", self.latest_image) + # 2) Dump raw LiDAR + np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar) + # 3) Write BEFORE_TRANSFORM + with open(f"data/{tstamp}_vehstate.txt", "w") as f: + vp = vehicle.pose + f.write( + f"BEFORE_TRANSFORM " + f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " + f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" + ) + # Compute vehicle_start_pose in either START or CURRENT + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + mode = "START" + else: + vehicle_start_pose = vehicle.pose + mode = "CURRENT" + with open(f"data/{tstamp}_vehstate.txt", "a") as f: + f.write( + f"AFTER_TRANSFORM " + f"x={vehicle_start_pose.x:.3f}, " + f"y={vehicle_start_pose.y:.3f}, " + f"z={vehicle_start_pose.z:.3f}, " + f"yaw={vehicle_start_pose.yaw:.2f}, " + f"pitch={vehicle_start_pose.pitch:.2f}, " + f"roll={vehicle_start_pose.roll:.2f}, " + f"frame={mode}\n" + ) + + undistorted_img, current_K = undistort_image(self.latest_image, self.K, self.D) + self.current_K = current_K + self.latest_image = undistorted_img + orig_H, orig_W = undistorted_img.shape[:2] + + # --- Begin modifications for three-angle detection --- + img_normal = undistorted_img + results_normal = self.detector(img_normal, conf=0.3, classes=[0]) + combined_boxes = [] + if self.orientation: + img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE) + img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE) + results_left = self.detector(img_left, conf=0.3, classes=[0]) + results_right = self.detector(img_right, conf=0.3, classes=[0]) + boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else [] + boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else [] + for box in boxes_left: + cx, cy, w, h = box + new_cx = cy + new_cy = orig_W - 1 - cx + combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.RIGHT)) + for box in boxes_right: + cx, cy, w, h = box + new_cx = orig_H - 1 - cy + new_cy = cx + combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.LEFT)) + + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + for box in boxes_normal: + cx, cy, w, h = box + combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) + + if getattr(self, 'visualize_2d', False): + for (cx, cy, w, h, activity) in combined_boxes: + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + if activity == AgentActivityEnum.STANDING: + color = (255, 0, 0) + label = "STANDING" + elif activity == AgentActivityEnum.RIGHT: + color = (0, 255, 0) + label = "RIGHT" + elif activity == AgentActivityEnum.LEFT: + color = (0, 0, 255) + label = "LEFT" + else: + color = (255, 255, 255) + label = "UNKNOWN" + cv2.rectangle(undistorted_img, (left, top), (right, bottom), color, 2) + cv2.putText(undistorted_img, label, (left, max(top - 5, 20)), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) + cv2.imshow("Detection - Cone 2D", undistorted_img) + + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + pts_cam = transform_points_l2c(lidar_down, self.T_l2c) + projected_pts = project_points(pts_cam, self.current_K, lidar_down) + + agents = {} + + for i, box_info in enumerate(combined_boxes): + cx, cy, w, h, activity = box_info + start = time.time() + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ + (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) + roi_pts = projected_pts[mask] + if roi_pts.shape[0] < 5: + continue + points_3d = roi_pts[:, 2:5] + points_3d = filter_points_within_threshold(points_3d, 30) + points_3d = filter_depth_points(points_3d, max_depth_diff=0.3) + + if self.use_cyl_roi: + global_filtered = filter_points_within_threshold(lidar_down, 30) + roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.3, height=1.2) + refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01) + refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.2) + else: + refined_cluster = remove_ground_by_min_range(points_3d, z_range=0.05) + end1 = time.time() + if refined_cluster.shape[0] < 4: + continue + + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(refined_cluster) + obb = pcd.get_oriented_bounding_box() + refined_center = obb.center + dims = tuple(obb.extent) + R_lidar = obb.R.copy() + end2 = time.time() + + refined_center_hom = np.append(refined_center, 1) + refined_center_vehicle_hom = self.T_l2v @ refined_center_hom + refined_center_vehicle = refined_center_vehicle_hom[:3] + + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + refined_center = refined_center_vehicle + + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) + xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3] + out_frame = ObjectFrameEnum.START + else: + xp, yp, zp = refined_center + out_frame = ObjectFrameEnum.CURRENT + + new_pose = ObjectPose( + t=current_time, + x=xp, y=yp, z=zp, + yaw=yaw, pitch=pitch, roll=roll, + frame=out_frame + ) + + # --- Optional tracking --- + if self.enable_tracking: + existing_id = match_existing_cone( + np.array([new_pose.x, new_pose.y, new_pose.z]), + dims, + self.tracked_agents, + distance_threshold=2.0 + ) + if existing_id is not None: + old_state = self.tracked_agents[existing_id] + if vehicle.v < 0.1: + alpha = 0.1 + avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x + avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y + avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z + avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw + avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch + avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll + + updated_pose = ObjectPose( + t=new_pose.t, + x=avg_x, + y=avg_y, + z=avg_z, + yaw=avg_yaw, + pitch=avg_pitch, + roll=avg_roll, + frame=new_pose.frame + ) + updated_agent = AgentState( + pose=updated_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + else: + updated_agent = old_state + agents[existing_id] = updated_agent + self.tracked_agents[existing_id] = updated_agent + else: + agent_id = f"Cone{self.cone_counter}" + self.cone_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + self.tracked_agents[agent_id] = new_agent + else: + agent_id = f"Cone{self.cone_counter}" + self.cone_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + + self.current_agents = agents + + # If tracking not enabled, return only current frame detections + if not self.enable_tracking: + return self.current_agents + + stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() + if current_time - agent.pose.t > 5.0] + for agent_id in stale_ids: + rospy.loginfo(f"Removing stale agent: {agent_id}\n") + del self.tracked_agents[agent_id] + + return self.tracked_agents + + # ----- Fake Cone Detector 2D (for Testing Purposes) ----- + +class FakConeDetector(Component): + def __init__(self, vehicle_interface: GEMInterface): + self.vehicle_interface = vehicle_interface + self.times = [(5.0, 20.0), (30.0, 35.0)] + self.t_start = None + + def rate(self): + return 4.0 + + def state_inputs(self): + return ['vehicle'] + + def state_outputs(self): + return ['agents'] + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + if self.t_start is None: + self.t_start = self.vehicle_interface.time() + t = self.vehicle_interface.time() - self.t_start + res = {} + for time_range in self.times: + if t >= time_range[0] and t <= time_range[1]: + res['cone0'] = box_to_fake_agent((0, 0, 0, 0)) + rospy.loginfo("Detected a Cone (simulated)") + return res + +def box_to_fake_agent(box): + x, y, w, h = box + pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) + dims = (w, h, 0) + return AgentState(pose=pose, dimensions=dims, outline=None, + type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), yaw_rate=0) + +if __name__ == '__main__': + pass From 0b84d288a422dade88ece3cc447a0fcbef059181 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 23 Apr 2025 10:40:41 -0500 Subject: [PATCH 04/55] Create cone_detection.yaml --- launch/cone_detection.yaml | 119 +++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 launch/cone_detection.yaml diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml new file mode 100644 index 000000000..165a5b9b8 --- /dev/null +++ b/launch/cone_detection.yaml @@ -0,0 +1,119 @@ +description: "Run the yielding trajectory planner on the real vehicle with real perception" +mode: hardware +vehicle_interface: gem_hardware.GEMHardwareInterface +mission_execution: StandardExecutor + +# Recovery behavior after a component failure +recovery: + planning: + trajectory_tracking : recovery.StopTrajectoryTracker + +# Driving behavior for the GEM vehicle. Runs real pedestrian perception, yield planner, but does not send commands to real vehicle. +drive: + perception: + state_estimation : GNSSStateEstimator + agent_detection : cone_detection.ConeDetector3D + perception_normalization : StandardPerceptionNormalizer + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'real' + params: { + 'yielder': 'expert', # 'expert', 'analytic', or 'simulation' + 'planner': 'milestone', # 'milestone', 'dt', or 'dx' + 'desired_speed': 1.0, # m/s, 1.5 m/s seems max speed? Feb24 + 'acceleration': 0.75 # m/s2, 0.5 is not enough to start moving. Feb24 + } + route_planning: + type: StaticRoutePlanner + args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start'] + motion_planning: longitudinal_planning.YieldTrajectoryPlanner + trajectory_tracking: + type: pure_pursuit.PurePursuitTrajectoryTracker + print: False + + +log: + # Specify the top-level folder to save the log files. Default is 'logs' + #folder : 'logs' + # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix + #prefix : 'fixed_route_' + # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix + #suffix : 'test3' + # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics. + ros_topics : + # Specify options to pass to rosbag record. Default is no options. + #rosbag_options : '--split --size=1024' + # If True, then record all readings / commands of the vehicle interface. Default False + vehicle_interface : True + # Specify which components to record to behavior.json. Default records nothing + components : ['state_estimation','agent_detection','motion_planning'] + # Specify which components of state to record to state.json. Default records nothing + #state: ['all'] + # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate + #state_rate: 10 + # If True, then make plots of the recorded data specifically from PurePursuitTrajectoryTracker_debug.csv and report metrics. Default False + #auto_plot : True +replay: # Add items here to set certain topics / inputs to be replayed from logs + # Specify which log folder to replay from + log: + ros_topics : [] + components : [] + +#usually can keep this constant +computation_graph: !include "../GEMstack/knowledge/defaults/computation_graph.yaml" + +variants: + detector_only: + run: + description: "Run the pedestrian detection code" + drive: + planning: + trajectory_tracking: + real_sim: + run: + description: "Run the pedestrian detection code with real detection and fake simulation" + mode: hardware + vehicle_interface: + type: gem_mixed.GEMRealSensorsWithSimMotionInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + mission_execution: StandardExecutor + require_engaged: False + visualization: !include "klampt_visualization.yaml" + drive: + perception: + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim' + + fake_real: + run: + description: "Run the yielding trajectory planner on the real vehicle with faked perception" + drive: + perception: + agent_detection : cone_detection.FakeConeDetector2D + + fake_sim: + run: + description: "Run the yielding trajectory planner in simulation with faked perception" + mode: simulation + vehicle_interface: + type: gem_simulator.GEMDoubleIntegratorSimulationInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + visualization: !include "klampt_visualization.yaml" + drive: + perception: + # agent_detection : pedestrian_detection.FakePedestrianDetector2D + agent_detection : OmniscientAgentDetector #this option reads agents from the simulator + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim' From ec41cf7187d9f57939f5a1fe3ddb7c5049d7da36 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Wed, 23 Apr 2025 10:44:36 -0500 Subject: [PATCH 05/55] Update agent.py --- GEMstack/state/agent.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GEMstack/state/agent.py b/GEMstack/state/agent.py index 757805837..2ca23ba76 100644 --- a/GEMstack/state/agent.py +++ b/GEMstack/state/agent.py @@ -11,6 +11,7 @@ class AgentEnum(Enum): LARGE_TRUCK = 2 PEDESTRIAN = 3 BICYCLIST = 4 + CONE = 5 class AgentActivityEnum(Enum): @@ -18,6 +19,9 @@ class AgentActivityEnum(Enum): MOVING = 1 # standard motion. Predictions will be used here FAST = 2 # indicates faster than usual motion, e.g., runners. UNDETERMINED = 3 # unknown activity + STANDING = 4 # standing cone + LEFT = 5 # flipped cone facing left + RIGHT = 6 # flipped cone facing right @dataclass @@ -41,4 +45,4 @@ def velocity_parent(self) -> Tuple[float,float,float]: def to_frame(self, frame : ObjectFrameEnum, current_pose = None, start_pose_abs = None) -> AgentState: newpose = self.pose.to_frame(frame,current_pose,start_pose_abs) newvelocity = convert_vector(self.velocity,self.pose.frame,frame,current_pose,start_pose_abs) - return replace(self,pose=newpose,velocity=newvelocity) \ No newline at end of file + return replace(self,pose=newpose,velocity=newvelocity) From 2b2afc62e72e98e24833a63a00383e5c936f895b Mon Sep 17 00:00:00 2001 From: LucasEby Date: Thu, 24 Apr 2025 23:09:18 -0500 Subject: [PATCH 06/55] Initial rosbag processing code to extract train and test image and lidar data into KITTI supported file types --- .../sensorFusionDataPrep/rosbag_processor.py | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100755 GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py diff --git a/GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py b/GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py new file mode 100755 index 000000000..9939f50d1 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py @@ -0,0 +1,100 @@ +import pypcd +import rospy +import message_filters +from sensor_msgs.msg import Image, PointCloud2 +import cv2 +# import os +from cv_bridge import CvBridge + +import numpy as np +np.float = np.float64 # temp fix for the following import +import ros_numpy + +from pathlib import Path + + +class RosbagProcessor(): + # Pairs up and stores Image and PointCloud2 messages into their respective test (20%) and training (80%) png and bin files. + def __init__(self, DATA_DIR: str, train_counter: int = 0, test_counter: int = 0) -> None: + + self.__TEST_DIR = DATA_DIR / 'test' + self.__TRAIN_DIR = DATA_DIR / 'train' + TEST_IMG_DIR = self.__TEST_DIR / 'img' + TEST_BIN_DIR = self.__TEST_DIR / 'bin' + TRAIN_IMG_DIR = self.__TRAIN_DIR / 'img' + TRAIN_BIN_DIR = self.__TRAIN_DIR / 'bin' + + # Create directories if they don't exist + TEST_IMG_DIR.mkdir(parents=True, exist_ok=True) + TEST_BIN_DIR.mkdir(parents=True, exist_ok=True) + TRAIN_IMG_DIR.mkdir(parents=True, exist_ok=True) + TRAIN_BIN_DIR.mkdir(parents=True, exist_ok=True) + + self.__train_counter = train_counter + self.__test_counter = test_counter + self.__bridge = CvBridge() + + # Subscribers and sychronizers + rospy.init_node('bagListener', anonymous=True) + self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) + self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2) + self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) + self.sync.registerCallback(self.pair_data_callback) + + rospy.spin() + + def pair_data_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + # Divide up 20% of the data for testing: + if (self.__train_counter + self.__test_counter) % 5 == 0: + PARENT_FOLDER = self.__TEST_DIR + idx = self.__test_counter + else: + PARENT_FOLDER = self.__TRAIN_DIR + idx = self.__train_counter + + # Store point cloud and image data with KITTI formatted filenames: + image_filename = PARENT_FOLDER / 'img' / f"{idx:06d}.png" + pc_filename = PARENT_FOLDER / 'bin' / f"{idx:06d}.bin" + # image_filename = self.get_path(image_filename) + # pc_filename = self.get_path(pc_filename) + + # Read and write the received image into a KITTI specified png file + cv_image = self.__bridge.imgmsg_to_cv2(rgb_image_msg, desired_encoding='bgr8') + cv2.imwrite(image_filename, cv_image) + + # Read and write the lidar data into a KITTI specified bin file: + # Convert to numpy array and flatten it + pc_array = ros_numpy.point_cloud2.pointcloud2_to_array(lidar_pc2_msg) + pc_array = pc_array.reshape(-1) + + # Create (N, 4) array and fill it + points = np.zeros((pc_array.shape[0], 4), dtype=np.float32) + points[:, 0] = pc_array['x'] + points[:, 1] = pc_array['y'] + points[:, 2] = pc_array['z'] + + # Check for intensity just incase: + if 'intensity' in pc_array.dtype.names: + points[:, 3] = pc_array['intensity'] + else: + rospy.loginfo(f"Intensity was set to zero") + points[:, 3] = 0.0 + + # Save as binary file + with open(pc_filename, 'wb') as f: + f.write(points.tobytes()) + + if (self.__train_counter + self.__test_counter) % 5 == 0: + rospy.loginfo(f"Saved test image and point cloud #{str(self.__test_counter)}") + self.__test_counter += 1 + else: + rospy.loginfo(f"Saved train image and point cloud #{str(self.__train_counter)}") + self.__train_counter += 1 + + +if __name__ == '__main__': + BASE_DIR = Path(__file__).resolve().parent + + # Path to the destination folder (relative from this script) + DATA_DIR = BASE_DIR.parent.parent.parent.parent / 'data' / 'sensorFusionData' + bag_processor = RosbagProcessor(DATA_DIR=DATA_DIR) From 04fff4c0dab88a994eb520dc8c1577c87d929b01 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Fri, 25 Apr 2025 21:10:30 -0500 Subject: [PATCH 07/55] fixed folder formatting and creation issues --- .../sensorFusionDataPrep/rosbag_processor.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py b/GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py index 9939f50d1..4742e2b49 100755 --- a/GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py +++ b/GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py @@ -16,13 +16,15 @@ class RosbagProcessor(): # Pairs up and stores Image and PointCloud2 messages into their respective test (20%) and training (80%) png and bin files. def __init__(self, DATA_DIR: str, train_counter: int = 0, test_counter: int = 0) -> None: - - self.__TEST_DIR = DATA_DIR / 'test' - self.__TRAIN_DIR = DATA_DIR / 'train' - TEST_IMG_DIR = self.__TEST_DIR / 'img' - TEST_BIN_DIR = self.__TEST_DIR / 'bin' - TRAIN_IMG_DIR = self.__TRAIN_DIR / 'img' - TRAIN_BIN_DIR = self.__TRAIN_DIR / 'bin' + self.__img_folder = 'image_2' + self.__lidar_folder = 'velodyne' + + self.__TEST_DIR = DATA_DIR / 'testing' + self.__TRAIN_DIR = DATA_DIR / 'training' + TEST_IMG_DIR = self.__TEST_DIR / self.__img_folder + TEST_BIN_DIR = self.__TEST_DIR / self.__lidar_folder + TRAIN_IMG_DIR = self.__TRAIN_DIR / self.__img_folder + TRAIN_BIN_DIR = self.__TRAIN_DIR / self.__lidar_folder # Create directories if they don't exist TEST_IMG_DIR.mkdir(parents=True, exist_ok=True) @@ -53,8 +55,8 @@ def pair_data_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): idx = self.__train_counter # Store point cloud and image data with KITTI formatted filenames: - image_filename = PARENT_FOLDER / 'img' / f"{idx:06d}.png" - pc_filename = PARENT_FOLDER / 'bin' / f"{idx:06d}.bin" + image_filename = PARENT_FOLDER / self.__img_folder / f"{idx:06d}.png" + pc_filename = PARENT_FOLDER / self.__lidar_folder / f"{idx:06d}.bin" # image_filename = self.get_path(image_filename) # pc_filename = self.get_path(pc_filename) @@ -97,4 +99,5 @@ def pair_data_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): # Path to the destination folder (relative from this script) DATA_DIR = BASE_DIR.parent.parent.parent.parent / 'data' / 'sensorFusionData' + DATA_DIR.mkdir(parents=True, exist_ok=True) bag_processor = RosbagProcessor(DATA_DIR=DATA_DIR) From 89939c14a98ab371c94ccd6ba90a5fbcf2603e35 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 27 Apr 2025 02:20:59 -0500 Subject: [PATCH 08/55] add iou calc and main func structure for 3d bbox perf eval. Need to add mAP --- .../sensorFusion/eval_3d_bbox_performance.py | 463 ++++++++++++++++++ .../rosbag_processor.py | 0 2 files changed, 463 insertions(+) create mode 100644 GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py rename GEMstack/onboard/perception/{sensorFusionDataPrep => sensorFusion}/rosbag_processor.py (100%) mode change 100755 => 100644 diff --git a/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py b/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py new file mode 100644 index 000000000..577b27940 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py @@ -0,0 +1,463 @@ +import argparse +import os +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from collections import defaultdict +import math +import warnings +import traceback + +###### Configuration ###### +IDX_X, IDX_Y, IDX_Z = 0, 1, 2 +IDX_L, IDX_W, IDX_H = 3, 4, 5 +IDX_YAW = 6 +IDX_CLASS = 7 +IDX_SCORE = 8 + +###### Helper Functions ###### + +def parse_box_line(line, is_gt=False): + """ + Parses a line from a KITTI format label file. + Handles the conversion from KITTI's y-coordinate (bottom of box) + to the geometric center y used internally. + """ + parts = line.strip().split() + try: + # Parses standard KITTI format: + # type truncated occluded alpha bbox_2d(4) dims(3) loc(3) rotation_y (score) + # 0 1 2 3 4-7 8-10 11-13 14 (15) + + # Check minimum length for standard KITTI format (15 fields without score) + if len(parts) < 15: + raise ValueError(f"Line does not have enough parts for KITTI format (expected >= 15, got {len(parts)})") + + class_label = parts[0] + + # Read dimensions (h, w, l) from KITTI standard indices 8, 9, 10 + h = float(parts[8]) + w = float(parts[9]) + l = float(parts[10]) + + # Read location (x, y_bottom, z) from KITTI standard indices 11, 12, 13 + loc_x = float(parts[11]) + loc_y_bottom = float(parts[12]) # KITTI 'y' is bottom of the box in camera coords + loc_z = float(parts[13]) + + # Read rotation_y (yaw) from KITTI standard index 14 + rot_y = float(parts[14]) + + # --- Convert to standardized internal format --- + # Internal Standardized format: [cx, cy, cz, l, w, h, yaw, class_label, score] + + # Use KITTI loc_x and loc_z directly for internal cx, cz + cx = loc_x + cz = loc_z + + # Convert KITTI y (bottom of box) to geometric center y for internal use. + # In KITTI camera coords, Y points down, so center is 'above' bottom (numerically smaller y): loc_y - h / 2 + cy = loc_y_bottom - (h / 2.0) + + # Use KITTI rot_y directly for internal yaw + yaw = rot_y + + # Create the standardized box list [cx, cy, cz, l, w, h, yaw, class_label] + # Note the internal dimension order (l, w, h) differs from KITTI input order (h, w, l) + box = [cx, cy, cz, l, w, h, yaw, class_label] + + # Append score if it's a prediction file (standard KITTI index 15) + if not is_gt: + if len(parts) > 15: + score = float(parts[15]) + else: + # Handle missing score in prediction file if needed + score = 1.0 # Assigning default 1.0 as before + warnings.warn(f"[LOG] Prediction line missing score (field 16), assigning default 1.0: {line.strip()}") + box.append(score) # Append score using IDX_SCORE implicitly + + return box + + except ValueError as ve: + print(f"[LOG] Error parsing line: '{line.strip()}' - {ve}") + print("[LOG] Does input follows KITTI format ?") + return None + except Exception as e: + print(f"[LOG] Unexpected error parsing line: '{line.strip()}' - {e}") + traceback.print_exc() # Print detailed traceback for unexpected errors + return None + + +def load_labels(file_path, is_gt=False): + """Loads ground truth or prediction boxes from a KITTI-format file.""" + boxes = defaultdict(list) + if not os.path.exists(file_path): + # Reduced verbosity for missing prediction files (common) + # print(f"[LOG] File not found {file_path}, returning empty dict.") + return boxes + try: + with open(file_path, 'r') as f: + for i, line in enumerate(f): + box = parse_box_line(line, is_gt=is_gt) + if box: + class_label = box[IDX_CLASS] + boxes[class_label].append(box) + else: + print(f"[LOG] Skipping invalid line {i+1} in {file_path}") + except Exception as e: + print(f"[LOG] Error reading file {file_path}: {e}") + traceback.print_exc() + return boxes + +def calculate_3d_iou(box1, box2): + """ + Calculates the 3D Intersection over Union (IoU) between two bounding boxes. + + Args: + box1, box2: List or tuple representing a 3D bounding box in the + *internal standardized format*: [cx, cy, cz, l, w, h, yaw, ...] + where cy is the geometric center y. + + Returns: + float: The 3D IoU value. + + *** PLACEHOLDER IMPLEMENTATION *** + This function needs a proper implementation for ROTATED 3D boxes. + The current version calculates IoU based on axis-aligned bounding boxes + derived from the dimensions and centers, which is inaccurate for rotated boxes. + """ + ####### Simple Axis-Aligned Bounding Box (AABB) IoU ####### + def get_aabb_corners(box): + # Uses internal format where cy is geometric center + cx, cy, cz, l, w, h = box[IDX_X], box[IDX_Y], box[IDX_Z], box[IDX_L], box[IDX_W], box[IDX_H] + # Ignores yaw + min_x, max_x = cx - l / 2, cx + l / 2 + min_y, max_y = cy - h / 2, cy + h / 2 # Uses geometric center cy + min_z, max_z = cz - w / 2, cz + w / 2 # Assuming cz is center z (depth) + return min_x, max_x, min_y, max_y, min_z, max_z + + min_x1, max_x1, min_y1, max_y1, min_z1, max_z1 = get_aabb_corners(box1) + min_x2, max_x2, min_y2, max_y2, min_z2, max_z2 = get_aabb_corners(box2) + + # Calculate intersection volume + inter_min_x = max(min_x1, min_x2) + inter_max_x = min(max_x1, max_x2) + inter_min_y = max(min_y1, min_y2) + inter_max_y = min(max_y1, max_y2) + inter_min_z = max(min_z1, min_z2) + inter_max_z = min(max_z1, max_z2) + + inter_l = max(0, inter_max_x - inter_min_x) + inter_h = max(0, inter_max_y - inter_min_y) + inter_w = max(0, inter_max_z - inter_min_z) + intersection_volume = inter_l * inter_h * inter_w + + # Calculate union volume + vol1 = box1[IDX_L] * box1[IDX_W] * box1[IDX_H] + vol2 = box2[IDX_L] * box2[IDX_W] * box2[IDX_H] + # Ensure volumes are positive and non-zero for stable IoU + vol1 = max(vol1, 1e-6) + vol2 = max(vol2, 1e-6) + union_volume = vol1 + vol2 - intersection_volume + union_volume = max(union_volume, 1e-6) # Avoid division by zero or very small numbers + + iou = intersection_volume / union_volume + # Clamp IoU to [0, 1] range + iou = max(0.0, min(iou, 1.0)) + return iou + +# TODO: calc mAP + +def main(): + parser = argparse.ArgumentParser(description='Evaluate N 3D Object Detectors using KITTI format labels.') + parser.add_argument('gt_dir', type=str, help='Directory containing ground truth label files (KITTI format).') + parser.add_argument('pred_dirs', type=str, nargs='+', help='List of directories containing prediction files (KITTI format).') + parser.add_argument('output_dir', type=str, help='Directory to save evaluation results.') + parser.add_argument('--detector_names', type=str, nargs='*', help='Optional names for the detectors (corresponding to pred_dirs). If not provided, uses directory names.') + parser.add_argument('--iou_threshold', type=float, default=0.5, help='3D IoU threshold for matching (default: 0.5). KITTI examples: 0.5 (Car Easy/Mod), 0.7 (Car Hard), 0.5 (Ped/Cyc Easy/Mod), 0.5 (Ped/Cyc Hard).') + parser.add_argument('--classes', type=str, nargs='*', default=['Car', 'Pedestrian', 'Cyclist'], help='List of classes to evaluate (default: KITTI common classes). Case sensitive.') + parser.add_argument('--file_extension', type=str, default='.txt', help='Extension of the label files (default: .txt).') + + args = parser.parse_args() + + ###### Argument Validation and Setup ###### + if not os.path.isdir(args.gt_dir): + print(f"[LOG] Error: Ground truth directory not found: {args.gt_dir}") + return + # Check prediction directories *before* loading GT + valid_pred_dirs = [] + original_detector_names = args.detector_names if args.detector_names else [os.path.basename(d.rstrip('/\\')) for d in args.pred_dirs] + final_detector_names = [] + pred_dir_map = {} # Map name back to directory path + + for i, d in enumerate(args.pred_dirs): + detector_name = original_detector_names[i] + if not os.path.isdir(d): + print(f"[LOG] Prediction directory not found: {d}. Skipping detector '{detector_name}'.") + else: + valid_pred_dirs.append(d) + final_detector_names.append(detector_name) + pred_dir_map[detector_name] = d + + if not valid_pred_dirs: + print("[LOG] Error: No valid prediction directories provided.") + return + + # Ensure output directory exists + try: + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + print(f"[LOG] Created output directory: {args.output_dir}") + except OSError as e: + print(f"[LOG] Error creating output directory {args.output_dir}: {e}") + return + + print("[LOG] Configuration") + print(f"Ground Truth Dir: {args.gt_dir}") + print(f"Prediction Dirs: {valid_pred_dirs}") # Show only valid dirs being used + print(f"Detector Names: {final_detector_names}") # Show corresponding names + print(f"Output Dir: {args.output_dir}") + print(f"IoU Threshold: {args.iou_threshold}") + print(f"Classes: {args.classes}") + print(f"File Extension: {args.file_extension}") + print("=====================\n") + + ###### Load Data ###### + print("[LOG] Loading data...") + try: + gt_files = sorted([f for f in os.listdir(args.gt_dir) if f.endswith(args.file_extension)]) + if not gt_files: + print(f"[LOG] No ground truth files found in {args.gt_dir} with extension {args.file_extension}. Cannot evaluate.") + return + + print(f"Found {len(gt_files)} ground truth files.") + gt_boxes_all_samples = {} # {sample_id: {class: [boxes]}} + # Load GT data first + print("Loading Ground Truth...") + for filename in gt_files: + sample_id = os.path.splitext(filename)[0] + gt_file_path = os.path.join(args.gt_dir, filename) + gt_boxes_all_samples[sample_id] = load_labels(gt_file_path, is_gt=True) + print("Ground Truth loaded.") + + # Load prediction data for valid detectors + pred_boxes_all_detectors = {} # {detector_name: {sample_id: {class: [boxes]}}} + print("Loading Predictions...") + for detector_name in final_detector_names: + pred_dir = pred_dir_map[detector_name] # Get path from map + print(f" Loading from {detector_name} ({pred_dir})...") + pred_boxes_all_detectors[detector_name] = {} + files_found_count = 0 + for filename in gt_files: # Iterate based on GT files for consistency + sample_id = os.path.splitext(filename)[0] + pred_file_path = os.path.join(pred_dir, filename) + # load_labels handles non-existent files, returns empty dict + pred_boxes_all_detectors[detector_name][sample_id] = load_labels(pred_file_path, is_gt=False) + if os.path.exists(pred_file_path): + files_found_count += 1 + print(f" Found {files_found_count} matching prediction files for {detector_name}.") + print("Predictions loaded.") + + print(f"\n[LOG] Loaded data for {len(gt_files)} samples and {len(final_detector_names)} detector(s).\n") + + except Exception as e: + print(f"[LOG] Error during data loading: {e}") + traceback.print_exc() + return + + ###### Evaluation ###### + print("[LOG] Evaluating detectors...") + results_all_detectors = {} # {detector_name: {class: {'ap':, 'p':, 'r':, 'gt':, 'pred':}}} + try: + for detector_name in final_detector_names: # Use the filtered list of names + print(f" Evaluating: {detector_name}") + # Check if prediction data was actually loaded for this detector + if detector_name not in pred_boxes_all_detectors: + print(f" [LOG] Skipping {detector_name} - No prediction data loaded (check LOG)") + results_all_detectors[detector_name] = {} # Store empty results + continue + + pred_boxes_all_samples = pred_boxes_all_detectors[detector_name] + + if not gt_boxes_all_samples: # Redundant check, but safe + print(f" Skipping {detector_name} - No ground truth data loaded.") + results_all_detectors[detector_name] = {} + continue + + # Perform evaluation + results_by_class = evaluate_detector( + gt_boxes_all_samples, + pred_boxes_all_samples, + args.classes, # Pass the user-specified classes + args.iou_threshold + ) + results_all_detectors[detector_name] = results_by_class + print("[LOG] Evaluation complete.\n") + except Exception as e: + print(f"[LOG] Error during evaluation: {e}") + traceback.print_exc() + return # Stop execution on evaluation error + + + ###### Report Results & Save ###### + print("[LOG] Results") + results_file_path = os.path.join(args.output_dir, 'evaluation_metrics.txt') + + try: + with open(results_file_path, 'w') as f: + f.write(f"Evaluation Results (IoU Threshold: {args.iou_threshold})\n") + f.write(f"Evaluated Classes: {', '.join(args.classes)}\n") + f.write("="*60 + "\n") + + overall_mAPs = {} # Store mAP for each detector {detector_name: mAP_value} + + # Process results for each detector that was evaluated + for detector_name in final_detector_names: + # Check if results exist for this detector (might be skipped if loading failed) + if detector_name not in results_all_detectors: + print(f"\nDetector: {detector_name} (SKIPPED - No results)") + f.write(f"\nDetector: {detector_name} (SKIPPED - No results)\n") + continue # Skip to the next detector + + results_by_class = results_all_detectors[detector_name] + print(f"\nDetector: {detector_name}") + f.write(f"\nDetector: {detector_name}\n") + f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10}\n") + f.write("-" * 55 + "\n") + + detector_aps = [] # AP values for classes with GT > 0 for this detector + evaluated_classes_with_gt = [] # Class names with GT > 0 for this detector + + # Iterate through the classes specified by the user for consistent reporting + for cls in args.classes: + # Check if GT data exists for this class across all samples + num_gt_total_for_class = sum(len(gt_boxes.get(cls, [])) for gt_boxes in gt_boxes_all_samples.values()) + + if cls in results_by_class: + # Results exist for this class and detector + res = results_by_class[cls] + ap = res['ap'] + num_gt = res['num_gt'] # Should match num_gt_total_for_class if evaluated correctly + num_pred = res['num_pred'] # Number of valid predictions for this class + print(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10}") + f.write(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10}\n") + # Include in mAP calculation only if there were GT boxes for this class + if num_gt > 0: + detector_aps.append(ap) + evaluated_classes_with_gt.append(cls) + # If num_gt is 0 for a class, its AP is 0 or NaN, not included in mAP. + else: + # No results entry for this class, implies 0 valid predictions processed. + # Report AP as 0.0000 if GT existed, otherwise N/A. + print(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10}") + f.write(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10}\n") + # If GT existed, this class contributes 0 to the mAP average. + if num_gt_total_for_class > 0: + detector_aps.append(0.0) + evaluated_classes_with_gt.append(cls) + + # Calculate and report mAP for this detector based on evaluated classes with GT + if detector_aps: # Check if list is not empty + mAP = np.mean(detector_aps) + overall_mAPs[detector_name] = mAP + # Report which classes contributed to the mAP + mAP_info = f"(Classes w/ GT: {', '.join(evaluated_classes_with_gt)})" if evaluated_classes_with_gt else "" + print("-" * 55) + print(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}") + f.write("-" * 55 + "\n") + f.write(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}\n") + else: + overall_mAPs[detector_name] = np.nan # Indicate mAP couldn't be computed + print("-" * 55) + print(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}") + f.write("-" * 55 + "\n") + f.write(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}\n") + + + ###### Overall Comparison Table ###### + # Check if there are results to compare + if len(final_detector_names) > 0 and results_all_detectors: + print("\n[LOG] Overall Class AP Comparison") + f.write("\n" + "="*60 + "\n") + f.write("Overall Class AP Comparison\n") + f.write("="*60 + "\n") + # Truncate long detector names for table header + header_names = [name[:12] for name in final_detector_names] # Limit name length + header = f"{'Class':<15}" + "".join([f" | {name:<12}" for name in header_names]) + print(header) + f.write(header + "\n") + print("-" * len(header)) + f.write("-" * len(header) + "\n") + + # Iterate through user-specified classes for the table rows + for cls in args.classes: + aps_str = "" + num_gt_total_for_class = sum(len(gt_boxes.get(cls, [])) for gt_boxes in gt_boxes_all_samples.values()) + + # Get AP for each detector for this class + for detector_name in final_detector_names: + # Retrieve AP using .get chains safely + ap_val = results_all_detectors.get(detector_name, {}).get(cls, {}).get('ap', np.nan) + + # Decide display value based on AP and total GT count + if np.isnan(ap_val): + # Show 0.0000 if GT existed, else N/A + display_val = f"{0.0:<12.4f}" if num_gt_total_for_class > 0 else f"{'N/A':<12}" + else: + # Format valid AP value + display_val = f"{ap_val:<12.4f}" + aps_str += f" | {display_val}" + + line = f"{cls:<15}" + aps_str + print(line) + f.write(line + "\n") + + ###### Overall mAP Comparison ###### + print("-" * len(header)) + f.write("-" * len(header) + "\n") + map_str = "" + for detector_name in final_detector_names: + map_val = overall_mAPs.get(detector_name, np.nan) # Get calculated mAP + map_str += f" | {map_val:<12.4f}" if not np.isnan(map_val) else f" | {'N/A':<12}" + map_line = f"{'mAP':<15}" + map_str + print(map_line) + f.write(map_line + "\n") + else: + print("\n[LOG] Skipping overall comparison table - no results available.") + f.write("\n[LOG] Skipping overall comparison table - no results available.\n") + + + print(f"\nMetrics saved to: {results_file_path}") + + except Exception as e: + print(f"[LOG] Error during results reporting/saving: {e}") + traceback.print_exc() + # Continue to plotting if possible + # return # Or stop here + + ###### Plotting ###### + print("\n[LOG] Generating PR curves...") + try: + # Check if results_all_detectors has data before plotting + if results_all_detectors and any(results_all_detectors.values()): + # Pass only valid detector results to plotting function + # Filter out detectors that were skipped or had errors resulting in no results dict + valid_results = {k: v for k, v in results_all_detectors.items() if isinstance(v, dict) and k in final_detector_names} + if valid_results: + plot_pr_curves(valid_results, args.classes, args.output_dir) + print(f"[LOG] PR curve plots potentially saved in: {args.output_dir}") + else: + print("[LOG] Skipping plotting - no valid evaluation results were generated.") + else: + print("[LOG] Skipping plotting - no evaluation results generated.") + + except Exception as e: + print(f"Error during plotting: {e}") + traceback.print_exc() + + print("\n[LOG] Evaluation finished.") + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py b/GEMstack/onboard/perception/sensorFusion/rosbag_processor.py old mode 100755 new mode 100644 similarity index 100% rename from GEMstack/onboard/perception/sensorFusionDataPrep/rosbag_processor.py rename to GEMstack/onboard/perception/sensorFusion/rosbag_processor.py From dab9d186306f08a0e0ad2fb91de7c562d8919303 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 27 Apr 2025 03:34:30 -0500 Subject: [PATCH 09/55] Finish eval_3d_bbox_performance. Added mAP scores, plot_pr_curves(), evaluate_detector() --- .../sensorFusion/eval_3d_bbox_performance.py | 236 +++++++++++++++++- 1 file changed, 228 insertions(+), 8 deletions(-) diff --git a/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py b/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py index 577b27940..58b14cb92 100644 --- a/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py +++ b/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py @@ -9,7 +9,7 @@ import warnings import traceback -###### Configuration ###### +###### Bbox coord mapping ###### IDX_X, IDX_Y, IDX_Z = 0, 1, 2 IDX_L, IDX_W, IDX_H = 3, 4, 5 IDX_YAW = 6 @@ -49,8 +49,8 @@ def parse_box_line(line, is_gt=False): # Read rotation_y (yaw) from KITTI standard index 14 rot_y = float(parts[14]) - # --- Convert to standardized internal format --- - # Internal Standardized format: [cx, cy, cz, l, w, h, yaw, class_label, score] + # Convert to internal format + # Internal format: [cx, cy, cz, l, w, h, yaw, class_label, score] # Use KITTI loc_x and loc_z directly for internal cx, cz cx = loc_x @@ -122,11 +122,9 @@ def calculate_3d_iou(box1, box2): Returns: float: The 3D IoU value. - *** PLACEHOLDER IMPLEMENTATION *** - This function needs a proper implementation for ROTATED 3D boxes. - The current version calculates IoU based on axis-aligned bounding boxes - derived from the dimensions and centers, which is inaccurate for rotated boxes. + Doesn't consider yaw """ + ####### Simple Axis-Aligned Bounding Box (AABB) IoU ####### def get_aabb_corners(box): # Uses internal format where cy is geometric center @@ -167,7 +165,229 @@ def get_aabb_corners(box): iou = max(0.0, min(iou, 1.0)) return iou -# TODO: calc mAP + +def calculate_ap(precision, recall): + """Calculates Average Precision using the PASCAL VOC method (area under monotonic PR curve).""" + # Convert to numpy arrays first for safety + if not isinstance(precision, (list, np.ndarray)) or not isinstance(recall, (list, np.ndarray)): + return 0.0 + precision = np.array(precision) + recall = np.array(recall) + + if precision.size == 0 or recall.size == 0: + return 0.0 + + # Prepend/Append points for interpolation boundaries + recall = np.concatenate(([0.], recall, [1.0])) + precision = np.concatenate(([0.], precision, [0.])) # Start with 0 precision at recall 0, end with 0 at recall 1 + + # Make precision monotonically decreasing (handles PR curve 'jiggles') + for i in range(len(precision) - 2, -1, -1): + precision[i] = max(precision[i], precision[i+1]) + + # Find indices where recall changes (avoids redundant calculations) + indices = np.where(recall[1:] != recall[:-1])[0] + 1 + + # Compute AP using the area under the curve (sum of rectangle areas) + ap = np.sum((recall[indices] - recall[indices-1]) * precision[indices]) + return ap + +def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou_threshold): + """Evaluates a single detector's predictions against ground truth.""" + results_by_class = {} + sample_ids = list(gt_boxes_all_samples.keys()) # Get fixed order of sample IDs + + for cls in classes: + all_pred_boxes_cls = [] + num_gt_cls = 0 + pred_sample_indices = [] # Store index from sample_ids for each prediction + + # Collect all GTs and Preds for this class across samples + for i, sample_id in enumerate(sample_ids): + # Use .get() with default empty dict/list for safety + gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) + pred_boxes = pred_boxes_all_samples.get(sample_id, {}).get(cls, []) + + num_gt_cls += len(gt_boxes) + for box in pred_boxes: + all_pred_boxes_cls.append(box) + pred_sample_indices.append(i) # Store the original sample index + + if not all_pred_boxes_cls: # Handle case with no predictions for this class + results_by_class[cls] = { + 'precision': np.array([]), # Use empty numpy arrays + 'recall': np.array([]), + 'ap': 0.0, + 'num_gt': num_gt_cls, + 'num_pred': 0 + } + continue # Skip to next class + + # Sort detections by confidence score (descending) + # Ensure scores exist and are numeric before sorting + scores = [] + valid_indices_for_sorting = [] + for idx, box in enumerate(all_pred_boxes_cls): + if len(box) > IDX_SCORE and isinstance(box[IDX_SCORE], (int, float)): + scores.append(-box[IDX_SCORE]) # Use negative score for descending sort with argsort + valid_indices_for_sorting.append(idx) + else: + warnings.warn(f"Class {cls}: Prediction missing score or invalid score type. Excluding from evaluation: {box}") + + if not valid_indices_for_sorting: # If filtering removed all boxes + results_by_class[cls] = {'precision': np.array([]),'recall': np.array([]),'ap': 0.0,'num_gt': num_gt_cls,'num_pred': 0} + continue + + # Filter lists based on valid scores + all_pred_boxes_cls = [all_pred_boxes_cls[i] for i in valid_indices_for_sorting] + pred_sample_indices = [pred_sample_indices[i] for i in valid_indices_for_sorting] + # Scores list is already built correctly + + # Get the sorted order based on scores + sorted_indices = np.argsort(scores) # argsort sorts ascending on negative scores -> descending order of original scores + + # Reorder the lists based on sorted scores + all_pred_boxes_cls = [all_pred_boxes_cls[i] for i in sorted_indices] + pred_sample_indices = [pred_sample_indices[i] for i in sorted_indices] + + + tp = np.zeros(len(all_pred_boxes_cls)) + fp = np.zeros(len(all_pred_boxes_cls)) + # Track matched GTs per sample: gt_matched[sample_idx][gt_box_idx] = True/False + gt_matched = defaultdict(lambda: defaultdict(bool)) # Indexed by sample_idx, then gt_idx + + # Match predictions + for det_idx, pred_box in enumerate(all_pred_boxes_cls): + sample_idx = pred_sample_indices[det_idx] # Get the original sample index (0 to num_samples-1) + sample_id = sample_ids[sample_idx] # Get the sample_id string using the index + gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) + + best_iou = -1.0 + best_gt_idx = -1 # Index relative to gt_boxes for this sample/class + + if not gt_boxes: # No GT for this class in this specific sample + fp[det_idx] = 1 + continue + + for gt_idx, gt_box in enumerate(gt_boxes): + # Explicitly check class match (belt-and-suspenders) + if pred_box[IDX_CLASS] == gt_box[IDX_CLASS]: + iou = calculate_3d_iou(pred_box, gt_box) + if iou > best_iou: + best_iou = iou + best_gt_idx = gt_idx + # else: # Should not happen if inputs are correctly filtered by class + # pass + + + if best_iou >= iou_threshold: + # Check if this GT box was already matched *in this sample* + if not gt_matched[sample_idx].get(best_gt_idx, False): + tp[det_idx] = 1 + gt_matched[sample_idx][best_gt_idx] = True # Mark as matched for this sample + else: + fp[det_idx] = 1 # Matched a GT box already covered by a higher-scored prediction + else: + fp[det_idx] = 1 # Did not match any available GT box with sufficient IoU + + # Calculate precision/recall + fp_cumsum = np.cumsum(fp) + tp_cumsum = np.cumsum(tp) + + # Avoid division by zero if num_gt_cls is 0 + recall = tp_cumsum / num_gt_cls if num_gt_cls > 0 else np.zeros_like(tp_cumsum, dtype=float) + + # Avoid division by zero if no predictions were made or matched (tp + fp = 0) + denominator = tp_cumsum + fp_cumsum + precision = np.divide(tp_cumsum, denominator, out=np.zeros_like(tp_cumsum, dtype=float), where=denominator!=0) + + + ap = calculate_ap(precision, recall) + + results_by_class[cls] = { + 'precision': precision, # Store as numpy arrays + 'recall': recall, + 'ap': ap, + 'num_gt': num_gt_cls, + 'num_pred': len(all_pred_boxes_cls) # Number of predictions *with valid scores* + } + + return results_by_class + + +def plot_pr_curves(results_all_detectors, classes, output_dir): + """Plots Precision-Recall curves for each class.""" + if not os.path.exists(output_dir): + try: + os.makedirs(output_dir) + except OSError as e: + print(f"[LOG] Error creating output directory {output_dir} for plots: {e}") + return # Cannot save plots + + detector_names = list(results_all_detectors.keys()) + + for cls in classes: + plt.figure(figsize=(10, 7)) + any_results_for_class = False # Track if any detector had results for this class + + for detector_name, results_by_class in results_all_detectors.items(): + if cls in results_by_class and results_by_class[cls]['num_pred'] > 0 : # Check if there were predictions + res = results_by_class[cls] + precision = res['precision'] + recall = res['recall'] + ap = res['ap'] + + # Ensure plotting works even if precision/recall are empty arrays + if recall.size > 0 and precision.size > 0: + # Prepend a point for plotting nicely from recall=0 + plot_recall = np.concatenate(([0.], recall)) + # Use precision[0] if available, else 0. + plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) + plt.plot(plot_recall, plot_precision, marker='.', markersize=4, linestyle='-', label=f'{detector_name} (AP={ap:.3f})') + any_results_for_class = True + else: # Handle case where num_pred > 0 but P/R arrays somehow ended up empty + plt.plot([0], [0], marker='s', markersize=5, linestyle='', label=f'{detector_name} (AP={ap:.3f}, No P/R data?)') + any_results_for_class = True # Still mark as having results + + + elif cls in results_by_class: # Class exists in evaluation, but no predictions were made for it + num_gt = results_by_class[cls]['num_gt'] + if num_gt > 0: + # Plot a marker indicating no predictions were made for this GT class + plt.plot([0], [0], marker='x', markersize=6, linestyle='', label=f'{detector_name} (No Pred, GT={num_gt})') + # else: # No GT and no predictions for this class, don't plot anything specific + # pass + # else: # Class not even in results dict for this detector (e.g., error during eval?) + # Could happen if detector had no files or all files failed parsing for this class + # Might indicate an issue, but avoid cluttering plot unless needed. + pass + + + if any_results_for_class: + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.title(f'Precision-Recall Curve for Class: {cls}') + plt.legend(loc='lower left') + plt.grid(True) + plt.xlim([-0.05, 1.05]) + plt.ylim([-0.05, 1.05]) + plot_path = os.path.join(output_dir, f'pr_curve_{cls}.png') + try: + plt.savefig(plot_path) + print(f"[LOG] Generated PR curve: {plot_path}") + except Exception as e: + print(f"[LOG] Error saving PR curve plot for class '{cls}': {e}") + finally: + plt.close() # Close the figure regardless of save success + else: + # Check if there was any GT data for this class across all detectors + # Use .get() chain safely + num_gt_total = sum(results_by_class.get(cls, {}).get('num_gt', 0) for results_by_class in results_all_detectors.values()) + if num_gt_total > 0: + print(f" Skipping PR plot for class '{cls}': No predictions found across detectors (GT={num_gt_total}).") + else: + print(f" Skipping PR plot for class '{cls}': No ground truth found.") + plt.close() # Close the empty figure def main(): parser = argparse.ArgumentParser(description='Evaluate N 3D Object Detectors using KITTI format labels.') From d393daa470fa20a00092809b5b5168d3cbfd83b6 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 27 Apr 2025 03:35:40 -0500 Subject: [PATCH 10/55] Add test_eval_3d_bbox_performance in KITTI format --- .../test_eval_3d_bbox_performance.py | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 GEMstack/onboard/perception/sensorFusion/test_eval_3d_bbox_performance.py diff --git a/GEMstack/onboard/perception/sensorFusion/test_eval_3d_bbox_performance.py b/GEMstack/onboard/perception/sensorFusion/test_eval_3d_bbox_performance.py new file mode 100644 index 000000000..6651f50bd --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/test_eval_3d_bbox_performance.py @@ -0,0 +1,165 @@ +import os +import sys +import numpy as np +import traceback +import shutil + + +eval_script_filename = "eval_3d_bbox_performance.py" +eval_module_name = "eval_3d_bbox_performance" +try: + if not os.path.exists(eval_script_filename): + raise FileNotFoundError(f"Evaluation script '{eval_script_filename}' not found in the current directory.") + + # Attempt the standard import + imported_module = __import__(eval_module_name) + print(f"Module '{eval_module_name}' imported successfully.") + +except: + print("##################") + print(f"{eval_script_filename} import error") + print("##################") + exit(1) + +#### Generate Dummy Data in KITTI Format ##### + +def create_dummy_kitti_data(base_dir, num_samples=3, classes=['Car', 'Pedestrian'], boxes_per_sample=5, is_pred=False, noise_level=0.1, seed=42): + """Generates dummy data files in KITTI format.""" + if os.path.exists(base_dir): + shutil.rmtree(base_dir) # Clean previous runs + os.makedirs(base_dir) + np.random.seed(seed) # reproducibility + + for i in range(num_samples): + filename = os.path.join(base_dir, f"{i:06d}.txt") + with open(filename, 'w') as f: + num_boxes = np.random.randint(1, boxes_per_sample + 1) + for _ in range(num_boxes): + cls = np.random.choice(classes) + + # Generate somewhat box parameters + h = np.random.uniform(1.4, 1.8) if cls == 'Car' else np.random.uniform(1.5, 1.9) # height + w = np.random.uniform(1.5, 2.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # width + l = np.random.uniform(3.5, 5.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # length + + loc_x = np.random.uniform(-15, 15) # center x (lateral) + loc_z = np.random.uniform(5, 50) # center z (depth) + loc_y_bottom = np.random.uniform(1.6, 1.7) # Approximate height of bottom relative to camera origin + rot_y = np.random.uniform(-np.pi/2, np.pi/2) # Yaw + + # Placeholder values + truncated = 0.0 + occluded = 0 # 0=visible + alpha = -10 + bbox_2d = [0.0, 0.0, 50.0, 50.0] + score = np.random.uniform(0.5, 1.0) + + # Add noise for predictions + if is_pred: + h *= np.random.normal(1, noise_level * 0.1) + w *= np.random.normal(1, noise_level * 0.1) + l *= np.random.normal(1, noise_level * 0.1) + loc_x += np.random.normal(0, noise_level * 1.0) + loc_y_bottom += np.random.normal(0, noise_level * 0.1) + loc_z += np.random.normal(0, noise_level * 3.0) + rot_y += np.random.normal(0, noise_level * np.pi/8) + h, w, l = max(0.1, h), max(0.1, w), max(0.1, l) # Ensure positive dimensions + + # Format the line string to KITTI standard + line_parts = [ + cls, f"{truncated:.2f}", f"{occluded:d}", f"{alpha:.2f}", + f"{bbox_2d[0]:.2f}", f"{bbox_2d[1]:.2f}", f"{bbox_2d[2]:.2f}", f"{bbox_2d[3]:.2f}", + f"{h:.2f}", f"{w:.2f}", f"{l:.2f}", + f"{loc_x:.2f}", f"{loc_y_bottom:.2f}", f"{loc_z:.2f}", + f"{rot_y:.2f}" + ] + + if is_pred: + line_parts.append(f"{score:.4f}") + + f.write(" ".join(line_parts) + "\n") + +# --- Setup Directories --- +base_output_dir = "./kitti_eval_demo" +gt_dir = os.path.join(base_output_dir, "dummy_gt_kitti") +pred1_dir = os.path.join(base_output_dir, "dummy_pred1_kitti") +pred2_dir = os.path.join(base_output_dir, "dummy_pred2_kitti") +eval_output_dir = os.path.join(base_output_dir, "eval_output") + +# --- Generate KITTI-formatted Dummy Data --- +print("\n[LOG] Generating dummy data (KITTI format) for demonstration...") +try: + create_dummy_kitti_data(gt_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=False, seed=42) + create_dummy_kitti_data(pred1_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, noise_level=0.1, seed=101) + create_dummy_kitti_data(pred2_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, noise_level=0.3, seed=202) + missing_pred_file = os.path.join(pred2_dir, "000004.txt") + if os.path.exists(missing_pred_file): + os.remove(missing_pred_file) + print(f"Removed {missing_pred_file} to simulate missing prediction.") + print("Dummy data generated.") +except Exception as e: + print(f"Error generating dummy data: {e}") + traceback.print_exc() + exit(1) + +# --- Run the Evaluation Script --- +print(f"\n[LOG] #### Run Evaluation ") +cmd_args = [ + gt_dir, + pred1_dir, + pred2_dir, + eval_output_dir, + '--detector_names', 'DetectorA_Imported', 'DetectorB_Imported', + '--iou_threshold', '0.7', + '--classes', 'Car', 'Pedestrian' +] + + +original_argv = sys.argv +sys.argv = [eval_script_filename] + cmd_args +exit_code = 0 + +try: + print(f"[LOG] call {eval_module_name}.main()...") + imported_module.main() + +except AttributeError: + exit_code = 1 +except Exception as e: + traceback.print_exc() + exit_code = 1 +finally: + # Restore original sys.argv + sys.argv = original_argv + +##### List generated output files ##### +print("\n--- Generated Output Files ---") +if os.path.exists(eval_output_dir): + try: + output_files = [os.path.join(eval_output_dir, item) for item in os.listdir(eval_output_dir)] + if output_files: + for item_path in sorted(output_files): # Sort for consistent listing + print(item_path) + else: + print(f"Output directory '{eval_output_dir}' is empty.") + except Exception as e: + print(f"Error listing output directory {eval_output_dir}: {e}") +else: + print(f"Output directory '{eval_output_dir}' not created or accessible.") + +# Display contents of the metrics file if execution was successful +metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') +if exit_code == 0 and os.path.exists(metrics_file): + print(f"\n--- Content of {metrics_file} ---") + try: + with open(metrics_file, 'r') as f: + print(f.read()) + except Exception as e: + print(f"Error reading metrics file {metrics_file}: {e}") +elif exit_code != 0: + # Use syntax compatible with Python < 3.8 + print(f"\n--- Metrics file not displayed due to execution error (exit_code={exit_code}) ---") +elif not os.path.exists(metrics_file): + print(f"\n--- Metrics file not found: {metrics_file} ---") + +print(f"\nDemo finished. Check the '{base_output_dir}' directory for generated data and results.") \ No newline at end of file From f22543342951c9719fadb842efb5d387df6d1bd5 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sat, 3 May 2025 06:02:22 -0500 Subject: [PATCH 11/55] Initial attempt at combining pointpillars with lidar. Ran into package versioning issues with klampt and numpy --- .../onboard/perception/combined_detection.py | 664 ++++++++++++++++++ launch/combined_detection.yaml | 132 ++++ 2 files changed, 796 insertions(+) create mode 100644 GEMstack/onboard/perception/combined_detection.py create mode 100644 launch/combined_detection.yaml diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py new file mode 100644 index 000000000..59eb369f5 --- /dev/null +++ b/GEMstack/onboard/perception/combined_detection.py @@ -0,0 +1,664 @@ +from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +from ..interface.gem import GEMInterface +from ..component import Component +from .perception_utils import * +from ultralytics import YOLO +import cv2 +from typing import Dict +import open3d as o3d +import numpy as np +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import os +import yaml + +# PointPillars imports: +import torch + +# Import compatibility layer first +try: + import numpy_compat # This will patch numpy before other imports +except ImportError: + # Create inline patch if file doesn't exist + import numpy as np + if not hasattr(np, 'long'): + np.long = np.int64 + +# Now try importing PointPillars +try: + from pointpillars.model import PointPillars +except ImportError as e: + # Provide a fallback + print(f"Warning: Failed to import PointPillars: {e}") + # Define a dummy PointPillars class to prevent further errors + class PointPillars: + def __init__(self, *args, **kwargs): + raise NotImplementedError("PointPillars failed to import correctly") + + +class CombinedDetector3D(Component): + """ + Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud + data by painting the points. Pedestrians are also detected with PointPillars + on the point cloud. The resulting 3D bounding boxes of each are fused together + with late sensor fusion. + + Tracking is optional: set `enable_tracking=False` to disable persistent tracking + and return only detections from the current frame. + + Supports multiple cameras; each camera’s intrinsics and extrinsics are + loaded from a single YAML calibration file via plain PyYAML. + """ + + def __init__( + self, + vehicle_interface: GEMInterface, + camera_name: str, + camera_calib_file: str, + enable_tracking: bool = True, + visualize_2d: bool = False, + use_cyl_roi: bool = False, + T_l2v: list = None, + save_data: bool = True, + orientation: bool = True, + use_start_frame: bool = True, + **kwargs + ): + # Core interfaces and state + self.vehicle_interface = vehicle_interface + self.current_agents = {} + self.tracked_agents = {} + self.ped_counter = 0 + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.start_pose_abs = None + self.start_time = None + + # Config flags + self.camera_name = camera_name + self.enable_tracking = enable_tracking + self.visualize_2d = visualize_2d + self.use_cyl_roi = use_cyl_roi + self.save_data = save_data + self.orientation = orientation + self.use_start_frame = use_start_frame + + # 1) Load lidar→vehicle transform + if T_l2v is not None: + self.T_l2v = np.array(T_l2v) + else: + self.T_l2v = np.array([ + [0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0.0, 0.0, 0.0, 1.0] + ]) + + # 2) Load camera intrinsics/extrinsics from the supplied YAML + with open(camera_calib_file, 'r') as f: + calib = yaml.safe_load(f) + + # Expect structure: + # cameras: + # front: + # K: [[...], [...], [...]] + # D: [...] + # T_l2c: [[...], ..., [...]] + cam_cfg = calib['cameras'][camera_name] + self.K = np.array(cam_cfg['K']) + self.D = np.array(cam_cfg['D']) + self.T_l2c = np.array(cam_cfg['T_l2c']) + + # Derived transforms + + self.undistort_map1 = None + self.undistort_map2 = None + self.camera_front = (camera_name=='front') + + def rate(self) -> float: + return 8 + + def state_inputs(self) -> list: + return ['vehicle'] + + def state_outputs(self) -> list: + return ['agents'] + + def initialize(self): + # --- Determine the correct RGB topic for this camera --- + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + # add additional camera mappings here if needed + } + rgb_topic = rgb_topic_map.get( + self.camera_name, + f'/{self.camera_name}/rgb/image_raw' + ) + + # Subscribe to the RGB and LiDAR streams + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer([ + self.rgb_sub, self.lidar_sub + ], queue_size=500, slop=0.05) + self.sync.registerCallback(self.synchronized_callback) + + # Initialize the YOLO detector + self.detector = YOLO('GEMstack/knowledge/detection/yolov8n.pt') # 'GEMstack/knowledge/detection/cone.pt') + self.detector.to('cuda') + + # Initialize PointPillars + self.pointpillars = PointPillars( + nclasses=3, + voxel_size=[0.16, 0.16, 4], + point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], + max_num_points=32, + max_voxels=(16000, 40000) + ) + + model_path = 'GEMstack/knowledge/detection/epoch_160.pth' + if torch.cuda.is_available(): + checkpoint = torch.load(model_path) + self.pointpillars.load_state_dict(checkpoint['model_state_dict']) + self.pointpillars.to('cuda') + else: + checkpoint = torch.load(model_path, map_location=torch.device('cpu')) + self.pointpillars.load_state_dict(checkpoint['model_state_dict']) + self.pointpillars.eval() # Set to evaluation mode + rospy.loginfo("PointPillars model loaded successfully") + + # Initialize the calibration matrices: + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] + self.camera_origin_in_lidar = self.T_c2l[:3, 3] + + def synchronized_callback(self, image_msg, lidar_msg): + step1 = time.time() + try: + self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") + except Exception as e: + rospy.logerr("Failed to convert image: {}".format(e)) + self.latest_image = None + step2 = time.time() + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + step3 = time.time() + # print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2) + + def undistort_image(self, image, K, D): + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + if self.undistort_map1 is None or self.undistort_map2 is None: + self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, + newCameraMatrix=newK, size=(w, h), + m1type=cv2.CV_32FC1) + + start = time.time() + undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST) + end = time.time() + # print('--------undistort', end-start) + return undistorted, newK + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + downsample = False + # Gate guards against data not being present for both sensors: + if self.latest_image is None or self.latest_lidar is None: + return {} + lastest_image = self.latest_image.copy() + + # Set up current time variables + start = time.time() + current_time = self.vehicle_interface.time() + + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + if self.start_time is None: + self.start_time = current_time + time_elapsed = current_time - self.start_time + + # Ensure data/ exists and build timestamp + if self.save_data: + self.save_sensor_data(vehicle=vehicle, latest_image=latest_image) + + if self.camera_front == False: + start = time.time() + undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D) + end = time.time() + # print('-------processing time undistort_image---', end -start) + self.current_K = current_K + orig_H, orig_W = undistorted_img.shape[:2] + + # --- Begin modifications for three-angle detection --- + img_normal = undistorted_img + else: + img_normal = lastest_image.copy() + undistorted_img = lastest_image.copy() + orig_H, orig_W = lastest_image.shape[:2] + self.current_K = self.K + results_normal = self.detector(img_normal, conf=0.35, classes=[0]) + combined_boxes = [] + if not self.enable_tracking: + self.ped_counter = 0 + if self.orientation: + img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE) + img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE) + results_left = self.detector(img_left, conf=0.1, classes=[0]) + results_right = self.detector(img_right, conf=0.1, classes=[0]) + boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else [] + boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else [] + for box in boxes_left: + cx, cy, w, h = box + new_cx = cy + new_cy = orig_W - 1 - cx + combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.RIGHT)) + for box in boxes_right: + cx, cy, w, h = box + new_cx = orig_H - 1 - cy + new_cy = cx + combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.LEFT)) + + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + for box in boxes_normal: + cx, cy, w, h = box + combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) + + # Visualize the received images in 2D with their corresponding labels + # It draws rectangles and labels on the images: + if getattr(self, 'visualize_2d', False): + for (cx, cy, w, h, activity) in combined_boxes: + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + if activity == AgentActivityEnum.STANDING: + color = (255, 0, 0) + label = "STANDING" + elif activity == AgentActivityEnum.RIGHT: + color = (0, 255, 0) + label = "RIGHT" + elif activity == AgentActivityEnum.LEFT: + color = (0, 0, 255) + label = "LEFT" + else: + color = (255, 255, 255) + label = "UNKNOWN" + cv2.rectangle(undistorted_img, (left, top), (right, bottom), color, 2) + cv2.putText(undistorted_img, label, (left, max(top - 5, 20)), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) + cv2.imshow("Detection - Cone 2D", undistorted_img) + + start = time.time() + # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. + # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) + pts_cam = transform_points_l2c(lidar_down, self.T_l2c) + projected_pts = project_points(pts_cam, self.current_K, lidar_down) + # What is returned: + # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position) + # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) + # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame + + # POINTPILLARS TIES IN HERE + pointpillars_detections = [] + # Get the LiDAR points (X, Y, Z) from the projected points + lidar_points = projected_pts[:, 2:5] # Lidar points are in the Lidar coordinate frame + + with torch.no_grad(): + # Convert to tensor and format for PointPillars + lidar_tensor = torch.from_numpy(lidar_points).float() + if torch.cuda.is_available(): + lidar_tensor = lidar_tensor.cuda() + + # Format as batch with a single point cloud + batched_pts = [lidar_tensor] + + # Get PointPillars predictions + results = self.pointpillars(batched_pts, mode='test') + + if results and len(results) > 0 and 'lidar_bboxes' in results[0]: + # Process PointPillars results + pp_bboxes = results[0]['lidar_bboxes'] + pp_labels = results[0]['labels'] + pp_scores = results[0]['scores'] + + for i, (bbox, label, score) in enumerate(zip(pp_bboxes, pp_labels, pp_scores)): + # Skip low confidence detections + if score < 0.5: # Adjust threshold as needed + continue + + # Extract center position and dimensions + x, y, z, l, w, h, yaw = bbox + center = np.array([x, y, z]) + dims = (l, w, h) + + # Convert to vehicle coordinates + center_hom = np.append(center, 1) + center_vehicle_hom = self.T_l2v @ center_hom + center_vehicle = center_vehicle_hom[:3] + + # Create rotation matrix and convert to Euler angles + R_lidar = R.from_euler('z', yaw).as_matrix() + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + veh_yaw, veh_pitch, veh_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + + # Transform to appropriate frame (START or CURRENT) + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) + xp, yp, zp = (T_vehicle_to_start @ np.append(center_vehicle, 1))[:3] + out_frame = ObjectFrameEnum.START + else: + xp, yp, zp = center_vehicle + out_frame = ObjectFrameEnum.CURRENT + + # Determine agent activity based on label + # Map PointPillars class indices to your activities + if label == 0: # Adjust mapping as needed + activity = AgentActivityEnum.STANDING + elif label == 1: + activity = AgentActivityEnum.MOVING + else: + activity = AgentActivityEnum.UNKNOWN + + # Create pose and record detection + pp_pose = ObjectPose( + t=current_time, + x=xp, y=yp, z=zp, + yaw=veh_yaw, pitch=veh_pitch, roll=veh_roll, + frame=out_frame + ) + + pointpillars_detections.append({ + 'pose': pp_pose, + 'dimensions': dims, + 'activity': activity, + 'score': score + }) + + rospy.loginfo(f"PointPillars detected {len(pointpillars_detections)} objects") + + + end = time.time() + # print('-------processing time1---', end -start) + + agents = {} + + for i, box_info in enumerate(combined_boxes): + # Calculate the 2D bounding box in the image + cx, cy, w, h, activity = box_info + left = int(cx - w / 1.6) + right = int(cx + w / 1.6) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + + # Cast rays from the camera through the 2D bounding box into 3D space and capture all LiDAR + # points that fall along those rays to get the 3D representation of the detected object: + mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ + (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) + roi_pts = projected_pts[mask] + + # Ignore regions of interest that don't have many points + if roi_pts.shape[0] < 5: + continue + + # Filter the points by distance and depth to remove outliers: + points_3d = roi_pts[:, 2:5] + points_3d = filter_points_within_threshold(points_3d, 40) + points_3d = remove_ground_by_min_range(points_3d, z_range=0.08) + points_3d = filter_depth_points(points_3d, max_depth_diff=0.5) + + # If enabled, use a cylindrical ROI around the mean point position, + # filter ground points, and depth points: + if self.use_cyl_roi: + global_filtered = filter_points_within_threshold(lidar_down, 30) + roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.4, height=1.2) + refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01) + refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.3) + else: + refined_cluster = points_3d.copy() + # end1 = time.time() + if refined_cluster.shape[0] < 4: + continue + + # Create an Open3D point cloud with the filtered points and calculate an oriented + # bounding box to get the object's precise 3D position, dimensions, center, and + # orientation + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(refined_cluster) + obb = pcd.get_oriented_bounding_box() + refined_center = obb.center + dims = tuple(obb.extent) + R_lidar = obb.R.copy() + + # Transform the object's center position from LiDAR to vehicle coordinates + refined_center_hom = np.append(refined_center, 1) + refined_center_vehicle_hom = self.T_l2v @ refined_center_hom + refined_center_vehicle = refined_center_vehicle_hom[:3] + + # Transforms the object's rotation matrix and converts it to yaw/pitch/roll + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + refined_center = refined_center_vehicle + + if self.use_start_frame: + # Transform coordinates to the starting vehicle frame + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) + xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3] + out_frame = ObjectFrameEnum.START + else: + # Use the current vehicle frame + xp, yp, zp = refined_center + out_frame = ObjectFrameEnum.CURRENT + + new_pose = ObjectPose( + t=current_time, + x=xp, y=yp, z=zp, + yaw=yaw, pitch=pitch, roll=roll, + frame=out_frame + ) + + # --- Optional tracking --- + if self.enable_tracking: + existing_id = match_existing_cone( + np.array([new_pose.x, new_pose.y, new_pose.z]), + dims, + self.tracked_agents, + distance_threshold=2.0 + ) + if existing_id is not None: + old_state = self.tracked_agents[existing_id] + if vehicle.v < 100: + alpha = 0.1 + avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x + avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y + avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z + avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw + avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch + avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll + + updated_pose = ObjectPose( + t=new_pose.t, + x=avg_x, + y=avg_y, + z=avg_z, + yaw=avg_yaw, + pitch=avg_pitch, + roll=avg_roll, + frame=new_pose.frame + ) + updated_agent = AgentState( + pose=updated_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + else: + updated_agent = old_state + agents[existing_id] = updated_agent + self.tracked_agents[existing_id] = updated_agent + else: + agent_id = f"Cone{self.ped_counter}" + self.ped_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + self.tracked_agents[agent_id] = new_agent + else: + agent_id = f"Cone{self.ped_counter}" + self.ped_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.CONE, + activity=activity, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + + self.current_agents = agents + + # If tracking not enabled, return only current frame detections + if not self.enable_tracking: + for agent_id, agent in self.current_agents.items(): + p = agent.pose + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + f"type:{agent.activity}" + ) + end = time.time() + # print('-------processing time', end -start) + return self.current_agents + + stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() + if current_time - agent.pose.t > 20.0] + for agent_id in stale_ids: + rospy.loginfo(f"Removing stale agent: {agent_id}\n") + del self.tracked_agents[agent_id] + if self.enable_tracking: + for agent_id, agent in self.tracked_agents.items(): + p = agent.pose + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + f"type:{agent.activity}" + ) + end = time.time() + # print('-------processing time', end -start) + return self.tracked_agents + + def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None: + os.makedirs("data", exist_ok=True) + tstamp = int(self.vehicle_interface.time() * 1000) + # 1) Dump raw image + cv2.imwrite(f"data/{tstamp}_image.png", lastest_image) + # 2) Dump raw LiDAR + np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar) + # 3) Write BEFORE_TRANSFORM + with open(f"data/{tstamp}_vehstate.txt", "w") as f: + vp = vehicle.pose + f.write( + f"BEFORE_TRANSFORM " + f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " + f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" + ) + # Compute vehicle_start_pose in either START or CURRENT + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + mode = "START" + else: + vehicle_start_pose = vehicle.pose + mode = "CURRENT" + with open(f"data/{tstamp}_vehstate.txt", "a") as f: + f.write( + f"AFTER_TRANSFORM " + f"x={vehicle_start_pose.x:.3f}, " + f"y={vehicle_start_pose.y:.3f}, " + f"z={vehicle_start_pose.z:.3f}, " + f"yaw={vehicle_start_pose.yaw:.2f}, " + f"pitch={vehicle_start_pose.pitch:.2f}, " + f"roll={vehicle_start_pose.roll:.2f}, " + f"frame={mode}\n" + ) + +# Fake 2D Combined Detector for testing purposes +class FakCombinedDetector(Component): + def __init__(self, vehicle_interface: GEMInterface): + self.vehicle_interface = vehicle_interface + self.times = [(5.0, 20.0), (30.0, 35.0)] + self.t_start = None + + def rate(self): + return 4.0 + + def state_inputs(self): + return ['vehicle'] + + def state_outputs(self): + return ['agents'] + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + if self.t_start is None: + self.t_start = self.vehicle_interface.time() + t = self.vehicle_interface.time() - self.t_start + res = {} + for time_range in self.times: + if t >= time_range[0] and t <= time_range[1]: + res['cone0'] = box_to_fake_agent((0, 0, 0, 0)) + rospy.loginfo("Detected a Cone (simulated)") + return res + + +def box_to_fake_agent(box): + x, y, w, h = box + pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) + dims = (w, h, 0) + return AgentState(pose=pose, dimensions=dims, outline=None, + type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), yaw_rate=0) + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/launch/combined_detection.yaml b/launch/combined_detection.yaml new file mode 100644 index 000000000..a63bd86ac --- /dev/null +++ b/launch/combined_detection.yaml @@ -0,0 +1,132 @@ +description: "Run the yielding trajectory planner on the real vehicle with real perception" +mode: hardware +vehicle_interface: gem_hardware.GEMHardwareInterface +mission_execution: StandardExecutor + +# Recovery behavior after a component failure +recovery: + planning: + trajectory_tracking : recovery.StopTrajectoryTracker + +# Driving behavior for the GEM vehicle. Runs real pedestrian perception, yield planner, but does not send commands to real vehicle. +drive: + perception: + state_estimation : GNSSStateEstimator + agent_detection : + type: combined_detection.CombinedDetector3D + args: + camera_name: front #[front, front_right] + camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml + + # optional overrides + enable_tracking: True # True if you want to enable tracking function + visualize_2d: False # True if you see 2D detection visualization + use_cyl_roi: False # True if you want to use a cylinder roi (no need to use if calibration is accurate) + save_data: False # True if you want to save the sensor input data + orientation: True # True if you need to detect flipped cones + use_start_frame: True # True if you need output in START frame, otherwise in CURRENT frame + + perception_normalization : StandardPerceptionNormalizer + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'real' + params: { + 'yielder': 'expert', # 'expert', 'analytic', or 'simulation' + 'planner': 'milestone', # 'milestone', 'dt', or 'dx' + 'desired_speed': 1.0, # m/s, 1.5 m/s seems max speed? Feb24 + 'acceleration': 0.75 # m/s2, 0.5 is not enough to start moving. Feb24 + } + route_planning: + type: StaticRoutePlanner + args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start'] + motion_planning: longitudinal_planning.YieldTrajectoryPlanner + trajectory_tracking: + type: pure_pursuit.PurePursuitTrajectoryTracker + print: False + + +log: + # Specify the top-level folder to save the log files. Default is 'logs' + #folder : 'logs' + # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix + #prefix : 'fixed_route_' + # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix + #suffix : 'test3' + # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics. + ros_topics : + # Specify options to pass to rosbag record. Default is no options. + #rosbag_options : '--split --size=1024' + # If True, then record all readings / commands of the vehicle interface. Default False + vehicle_interface : True + # Specify which components to record to behavior.json. Default records nothing + components : ['state_estimation','agent_detection','motion_planning'] + # Specify which components of state to record to state.json. Default records nothing + #state: ['all'] + # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate + #state_rate: 10 + # If True, then make plots of the recorded data specifically from PurePursuitTrajectoryTracker_debug.csv and report metrics. Default False + #auto_plot : True +replay: # Add items here to set certain topics / inputs to be replayed from logs + # Specify which log folder to replay from + log: + ros_topics : [] + components : [] + +#usually can keep this constant +computation_graph: !include "../GEMstack/knowledge/defaults/computation_graph.yaml" + +variants: + detector_only: + run: + description: "Run the pedestrian detection code" + drive: + planning: + trajectory_tracking: + real_sim: + run: + description: "Run the pedestrian detection code with real detection and fake simulation" + mode: hardware + vehicle_interface: + type: gem_mixed.GEMRealSensorsWithSimMotionInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + mission_execution: StandardExecutor + require_engaged: False + visualization: !include "klampt_visualization.yaml" + drive: + perception: + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim' + + fake_real: + run: + description: "Run the yielding trajectory planner on the real vehicle with faked perception" + drive: + perception: + agent_detection : cone_detection.FakeConeDetector2D + + fake_sim: + run: + description: "Run the yielding trajectory planner in simulation with faked perception" + mode: simulation + vehicle_interface: + type: gem_simulator.GEMDoubleIntegratorSimulationInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + visualization: !include "klampt_visualization.yaml" + drive: + perception: + # agent_detection : pedestrian_detection.FakePedestrianDetector2D + agent_detection : OmniscientAgentDetector #this option reads agents from the simulator + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim' \ No newline at end of file From 9234869d31558d6824f78a9256b68e85e91f8372 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sat, 3 May 2025 09:49:24 -0500 Subject: [PATCH 12/55] Will start modifying code to publish a BoundBoxArray --- .../onboard/perception/lidar_detection.py | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 GEMstack/onboard/perception/lidar_detection.py diff --git a/GEMstack/onboard/perception/lidar_detection.py b/GEMstack/onboard/perception/lidar_detection.py new file mode 100644 index 000000000..17eb6ba76 --- /dev/null +++ b/GEMstack/onboard/perception/lidar_detection.py @@ -0,0 +1,172 @@ +# from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +# from ..interface.gem import GEMInterface +# from ..component import Component +# from perception_utils import * +import numpy as np +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import os + +# PointPillars imports: +import torch +from pointpillars.model import PointPillars + +# Publisher imports: +from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray +from geometry_msgs.msg import Pose, Vector3 + + +import numpy as np +import ros_numpy +def pc2_to_numpy(pc2_msg, want_rgb=False): + """ + Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. + This function extracts the x, y, z coordinates from the point cloud. + """ + print(pc2_msg.fields) + # Convert the ROS message to a numpy structured array + pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) + # Stack x,y,z fields to a (N,3) array + pts = np.stack((np.array(pc['x']).ravel(), + np.array(pc['y']).ravel(), + np.array(pc['z']).ravel(), + np.array(pc['intensity']).ravel()), axis=1) + # Apply filtering (for example, x > 0 and z in a specified range) + mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) + return pts[mask] + +class PointPillarsNode(): + """ + Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud + data by painting the points. Pedestrians are also detected with PointPillars + on the point cloud. The resulting 3D bounding boxes of each are fused together + with late sensor fusion. + + Tracking is optional: set `enable_tracking=False` to disable persistent tracking + and return only detections from the current frame. + """ + + def __init__( + self, + ): + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.camera_name = 'front' + self.camera_front = (self.camera_name=='front') + self.initialize() + + def initialize(self): + # --- Determine the correct RGB topic for this camera --- + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + # add additional camera mappings here if needed + } + rgb_topic = rgb_topic_map.get( + self.camera_name, + f'/{self.camera_name}/rgb/image_raw' + ) + + # Initialize PointPillars node + rospy.init_node('pointpillars_box_publisher') + + # Subscribe to the RGB and LiDAR streams + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer([ + self.rgb_sub, self.lidar_sub + ], queue_size=500, slop=0.05) + self.sync.registerCallback(self.synchronized_callback) + + # Initialize PointPillars + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.pointpillars = PointPillars( + nclasses=3, + voxel_size=[0.16, 0.16, 4], + point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], + max_num_points=32, + max_voxels=(16000, 40000) + ) + self.pointpillars.to(device) + + model_path = 'epoch_160.pth' + checkpoint = torch.load(model_path) #, map_location='cuda' if torch.cuda.is_available() else 'cpu') + self.pointpillars.load_state_dict(checkpoint) + + # if torch.cuda.is_available(): + # self.pointpillars = self.pointpillars.cuda() + + self.pointpillars.eval() + rospy.loginfo("PointPillars model loaded successfully") + + # Create bounding box publisher + self.pub = rospy.Publisher('/pointpillars_boxes', BoundingBoxArray, queue_size=10) + + rospy.loginfo("PointPillars node initialized and waiting for messages.") + + def synchronized_callback(self, image_msg, lidar_msg): + rospy.loginfo("Received synchronized RGB and LiDAR messages") + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + + downsample = False + + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + boxes = BoundingBoxArray() + boxes.header.frame_id = 'velodyne' + boxes.header.stamp = lidar_msg.header.stamp + + pointpillars_detections = [] + with torch.no_grad(): + # Convert to tensor and format for PointPillars + lidar_tensor = torch.from_numpy(lidar_down).float() + if torch.cuda.is_available(): + lidar_tensor = lidar_tensor.cuda() + + # Format as batch with a single point cloud + batched_pts = [lidar_tensor] + + # Get PointPillars predictions + results = self.pointpillars(batched_pts, mode='test') + + if results and len(results) > 0 and 'lidar_bboxes' in results[0]: + # Process PointPillars results + pp_bboxes = results[0]['lidar_bboxes'] + pp_labels = results[0]['labels'] + pp_scores = results[0]['scores'] + + for i, (bbox, label, score) in enumerate(zip(pp_bboxes, pp_labels, pp_scores)): + # Skip low confidence detections + if score < 0.5: # Adjust threshold as needed + continue + + rospy.loginfo(f"PointPillars detected {bbox} objects") + + # Extract center position and dimensions + x, y, z, l, w, h, yaw = bbox + center = np.array([x, y, z]) + dims = (l, w, h) + + pointpillars_detections.append({ + 'pose': pp_pose, + 'dimensions': dims, + 'activity': activity, + 'score': score + }) + + rospy.loginfo(f"PointPillars detected {len(pointpillars_detections)} objects") + +if __name__ == '__main__': + try: + node = PointPillarsNode() + rospy.spin() + except rospy.ROSInterruptException: + pass \ No newline at end of file From b27dd43d4500915bd7578a97b0c6209a23f66347 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sat, 3 May 2025 20:35:34 -0500 Subject: [PATCH 13/55] Initial PointPillars container build process --- .../onboard/perception/build_point_pillars.sh | 55 +++++++++++ .../perception/setup/Dockerfile.cuda111 | 92 +++++++++++++++++++ .../perception/setup/docker-compose.yaml | 42 +++++++++ .../perception/setup/get_nvidia_container.sh | 14 +++ 4 files changed, 203 insertions(+) create mode 100644 GEMstack/onboard/perception/build_point_pillars.sh create mode 100644 GEMstack/onboard/perception/setup/Dockerfile.cuda111 create mode 100644 GEMstack/onboard/perception/setup/docker-compose.yaml create mode 100755 GEMstack/onboard/perception/setup/get_nvidia_container.sh diff --git a/GEMstack/onboard/perception/build_point_pillars.sh b/GEMstack/onboard/perception/build_point_pillars.sh new file mode 100644 index 000000000..8b50ac246 --- /dev/null +++ b/GEMstack/onboard/perception/build_point_pillars.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Check if nvidia-docker is installed +if ! command -v nvidia-container-toolkit &> /dev/null; then + echo "NVIDIA Container Toolkit not found. Do you want to install it? (y/n)" + read -p ">" choice + if [ "$choice" = "y" ] || [ "$choice" = "Y" ]; then + echo "Installing NVIDIA Container Toolkit..." + ./setup/get_nvidia_container.sh + else + echo "NVIDIA Container Toolkit is required for GPU support." + echo "You can install it later by running ./get_nvidia_container.sh" + fi +fi + +# Check if the point_pillars_node.py and model weights exist +if [ ! -f "point_pillars_node.py" ]; then + echo "ERROR: point_pillars_node.py not found in the current directory!" + echo "Please place your point_pillars_node.py file in the same directory as this script." + exit 1 +fi + +if [ ! -f "epoch_160.pth" ]; then + echo "WARNING: epoch_160.pth model weights not found in the current directory!" + echo "Please place your model weights file in the same directory as this script." + echo "Continue anyway? (y/n)" + read -p ">" choice + if [ "$choice" != "y" ] && [ "$choice" != "Y" ]; then + exit 1 + fi +fi + +echo "Building Point Pillars Docker container..." +export DOCKERFILE=setup/Dockerfile.cuda111 + +# Using sudo to handle permissions +MY_UID=$(id -u) +MY_GID=$(id -g) + +# Attempt to use docker-compose directly, then with sudo if needed (if you uncomment it) +if ! docker compose -f setup/docker-compose.yaml build; then + echo "Uncomment these lines if you wish to use sudo to build container as backup" + # echo "Using sudo to build the container..." + # sudo -E docker compose -f setup/docker-compose.yaml build +fi + +echo "Build complete. To start the container, run:" +echo "docker compose -f setup/docker-compose.yaml up" +echo "" +echo "Or with sudo if you have permission issues:" +echo "sudo docker compose -f setup/docker-compose.yaml up" +echo "" +echo "To run in detached mode (background):" +echo "docker compose -f setup/docker-compose.yaml up -d" +echo "Or: sudo docker compose -f setup/docker-compose.yaml up -d" diff --git a/GEMstack/onboard/perception/setup/Dockerfile.cuda111 b/GEMstack/onboard/perception/setup/Dockerfile.cuda111 new file mode 100644 index 000000000..6b901b289 --- /dev/null +++ b/GEMstack/onboard/perception/setup/Dockerfile.cuda111 @@ -0,0 +1,92 @@ +FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04 + +ARG USER_UID=1000 +ARG USER_GID=1000 +ARG USER=ppuser + +ENV DEBIAN_FRONTEND=noninteractive + +# Use bash instead of sh +SHELL ["/bin/bash", "-c"] + +# Install basic dependencies +RUN apt-get update && apt-get install -y git python3 python3-pip wget zstd sudo ninja-build + +# Set time zone non-interactively +ENV TZ=America/Chicago +RUN ln -fs /usr/share/zoneinfo/$TZ /etc/localtime \ + && echo $TZ > /etc/timezone \ + && apt-get update && apt-get install -y tzdata \ + && rm -rf /var/lib/apt/lists/* + +# Install ROS Noetic +RUN apt-get update && apt-get install -y lsb-release gnupg2 +RUN sh -c 'echo "deb http://packages.ros.org/ros/ubuntu focal main" > /etc/apt/sources.list.d/ros-latest.list' +RUN wget https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc +RUN apt-key add ros.asc +RUN apt-get update +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y ros-noetic-desktop +RUN apt-get install -y python3-rosdep python3-rosinstall python3-rosinstall-generator python3-wstool build-essential python3-catkin-tools + +# Install JSK recognition messages package +RUN apt-get install -y ros-noetic-jsk-recognition-msgs + +RUN rosdep init +RUN rosdep update + +# Create a non-root user safely (with fixed username to avoid conflicts) +RUN groupadd -g ${USER_GID} ${USER} || echo "Group with GID ${USER_GID} already exists" +RUN useradd -l -m -u ${USER_UID} -g ${USER_GID} ${USER} || echo "User with UID ${USER_UID} already exists" +RUN echo "${USER} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +# Fix permissions for Python packages +RUN chown -R ${USER}:${USER} /usr/local/lib/python3.8/dist-packages/ \ + && chmod -R u+rw /usr/local/lib/python3.8/dist-packages/ + +# Create workspace directory +RUN mkdir -p /home/${USER}/pointpillars_ws + +# Install PointPillars dependencies +RUN pip3 install rospkg catkin_pkg empy + +# Set CUDA architecture flags explicitly +ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6" + +# Clone PointPillars repository +WORKDIR /home/${USER}/pointpillars_ws +RUN git clone https://github.com/zhulf0804/PointPillars.git +WORKDIR /home/${USER}/pointpillars_ws/PointPillars + +# Install PyTorch with exact version specified in requirements +RUN pip3 install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 \ + -f https://download.pytorch.org/whl/torch_stable.html + +# Install all other dependencies from requirements.txt except open3d +RUN pip3 install numba==0.48.0 numpy==1.19.5 PyYAML==6.0 setuptools==58.0.4 tensorboard tqdm==4.62.3 +RUN pip3 install opencv-python==4.5.5.62 + +# Install Open3D directly using a specific wheel for Python 3.8 +RUN pip3 install --no-cache-dir --pre https://storage.googleapis.com/open3d-releases-master/python-wheels/open3d-0.14.1-cp38-cp38-linux_x86_64.whl || \ + pip3 install --no-cache-dir open3d==0.13.0 + +# Build and install PointPillars +RUN python3 setup.py build_ext --inplace +RUN pip3 install . + +# Add ROS paths to bashrc +RUN echo "source /opt/ros/noetic/setup.bash" >> /home/${USER}/.bashrc + +# Create startup script +RUN echo '#!/bin/bash' > /home/${USER}/start_node.sh \ + && echo 'cd /home/${USER}/pointpillars_ws' >> /home/${USER}/start_node.sh \ + && echo 'python3 point_pillars_node.py' >> /home/${USER}/start_node.sh \ + && chmod +x /home/${USER}/start_node.sh + +# Set ownership of the workspace +RUN chown -R ${USER}:${USER} /home/${USER} + +USER ${USER} +WORKDIR /home/${USER}/pointpillars_ws + +# Command to run when the container starts +CMD ["/bin/bash", "-c", "/home/${USER}/start_node.sh"] diff --git a/GEMstack/onboard/perception/setup/docker-compose.yaml b/GEMstack/onboard/perception/setup/docker-compose.yaml new file mode 100644 index 000000000..0e971dfa6 --- /dev/null +++ b/GEMstack/onboard/perception/setup/docker-compose.yaml @@ -0,0 +1,42 @@ +version: '3.9' + +services: + point-pillars: + image: point_pillars + container_name: point_pillars_container + build: + context: .. + dockerfile: ${DOCKERFILE:-setup/Dockerfile.cuda111} # Default to cuda111 if not specified + args: + USER: ppuser # Fixed username to avoid conflicts + USER_UID: ${UID:-1000} # Pass host UID + USER_GID: ${GID:-1000} # Pass host GID + stdin_open: true + tty: true + volumes: + # Mount the point_pillars_node.py file and model weights + - "./point_pillars_node.py:/home/ppuser/pointpillars_ws/point_pillars_node.py:ro" + - "./epoch_160.pth:/home/ppuser/pointpillars_ws/epoch_160.pth:ro" + # Mount host home directory (optional) + - "~:/home/ppuser/host:ro" + - "/tmp/.X11-unix:/tmp/.X11-unix:rw" + environment: + - DISPLAY=${DISPLAY} + # ROS master settings to connect to external roscore + - ROS_MASTER_URI=http://host.docker.internal:11311 + - ROS_HOSTNAME=point_pillars_container + - ROS_IP=127.0.0.1 + - NVIDIA_DRIVER_CAPABILITIES=all + - NVIDIA_VISIBLE_DEVICES=all + network_mode: "host" # Use host network for direct ROS communication + ipc: host + user: "ppuser:ppuser" # Fixed username to match Dockerfile + restart: unless-stopped # Auto-restart if the container stops + # GPU configuration + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] diff --git a/GEMstack/onboard/perception/setup/get_nvidia_container.sh b/GEMstack/onboard/perception/setup/get_nvidia_container.sh new file mode 100755 index 000000000..4f738e2c8 --- /dev/null +++ b/GEMstack/onboard/perception/setup/get_nvidia_container.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + +sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list + +sudo apt-get update +sudo apt-get install -y nvidia-container-toolkit + +sudo nvidia-ctk runtime configure --runtime=docker +sudo systemctl restart docker \ No newline at end of file From f03ca216be26bc74ea3be4f07c90e1fbe0071b5d Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sat, 3 May 2025 21:48:49 -0500 Subject: [PATCH 14/55] Initial point pillars and yolo nodes --- .../onboard/perception/point_pillars_node.py | 249 +++++++++ GEMstack/onboard/perception/yolo_node.py | 510 ++++++++++++++++++ 2 files changed, 759 insertions(+) create mode 100644 GEMstack/onboard/perception/point_pillars_node.py create mode 100644 GEMstack/onboard/perception/yolo_node.py diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py new file mode 100644 index 000000000..479b0efce --- /dev/null +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -0,0 +1,249 @@ +# from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +# from ..interface.gem import GEMInterface +# from ..component import Component +# from perception_utils import * +import numpy as np +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import os + +# PointPillars imports: +import torch +from pointpillars.model import PointPillars + +# Publisher imports: +from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray +from geometry_msgs.msg import Pose, Vector3 + + +import numpy as np +import ros_numpy +def pc2_to_numpy_with_intensity(pc2_msg, want_rgb=False): + """ + Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. + This function extracts the x, y, z coordinates from the point cloud. + """ + # Convert the ROS message to a numpy structured array + pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) + # Stack x,y,z fields to a (N,3) array + pts = np.stack((np.array(pc['x']).ravel(), + np.array(pc['y']).ravel(), + np.array(pc['z']).ravel(), + np.array(pc['intensity']).ravel()), axis=1) + # Apply filtering (for example, x > 0 and z in a specified range) + mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) + return pts[mask] + +class PointPillarsNode(): + """ + Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud + data by painting the points. Pedestrians are also detected with PointPillars + on the point cloud. The resulting 3D bounding boxes of each are fused together + with late sensor fusion. + + Tracking is optional: set `enable_tracking=False` to disable persistent tracking + and return only detections from the current frame. + """ + + def __init__( + self, + ): + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.camera_name = 'front' + self.camera_front = (self.camera_name=='front') + self.score_threshold = 0.4 + self.initialize() + + def initialize(self): + # --- Determine the correct RGB topic for this camera --- + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + # add additional camera mappings here if needed + } + rgb_topic = rgb_topic_map.get( + self.camera_name, + f'/{self.camera_name}/rgb/image_raw' + ) + + # Initialize PointPillars node + rospy.init_node('pointpillars_box_publisher') + # Create bounding box publisher + self.pub = rospy.Publisher('/pointpillars_boxes', BoundingBoxArray, queue_size=10) + rospy.loginfo("PointPillars node initialized and waiting for messages.") + + # Initialize PointPillars + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.pointpillars = PointPillars( + nclasses=3, + voxel_size=[0.16, 0.16, 4], + point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], + max_num_points=32, + max_voxels=(16000, 40000) + ) + self.pointpillars.to(device) + + model_path = 'epoch_160.pth' + checkpoint = torch.load(model_path) #, map_location='cuda' if torch.cuda.is_available() else 'cpu') + self.pointpillars.load_state_dict(checkpoint) + + # if torch.cuda.is_available(): + # self.pointpillars = self.pointpillars.cuda() + + self.pointpillars.eval() + rospy.loginfo("PointPillars model loaded successfully") + + if self.camera_front: + self.K = np.array([[684.83331299, 0., 573.37109375], + [0., 684.60968018, 363.70092773], + [0., 0., 1.]]) + else: + self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02], + [0.00000000e+00, 1.17514569e+03, 6.08580326e+02], + [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) + + if self.camera_front: + self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) + else: + self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05, + -6.19939758e-02]) + + self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0., 0., 0., 1.]]) + if self.camera_front: + self.T_l2c = np.array([ + [0.001090, -0.999489, -0.031941, 0.149698], + [-0.007664, 0.031932, -0.999461, -0.397813], + [0.999970, 0.001334, -0.007625, -0.691405], + [0., 0., 0., 1.000000] + ]) + else: + self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003], + [-0.09720448, 0.13371206, -0.98624154, -0.1598301], + [0.68884317, -0.7061996, -0.16363744, -1.04767285], + [0., 0., 0., 1.]] + ) + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] + self.camera_origin_in_lidar = self.T_c2l[:3, 3] + + # Subscribe to the RGB and LiDAR streams + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer([ + self.rgb_sub, self.lidar_sub + ], queue_size=500, slop=0.05) + self.sync.registerCallback(self.synchronized_callback) + + def synchronized_callback(self, image_msg, lidar_msg): + rospy.loginfo("Received synchronized RGB and LiDAR messages") + self.latest_lidar = pc2_to_numpy_with_intensity(lidar_msg, want_rgb=False) + + downsample = False + + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + boxes = BoundingBoxArray() + boxes.header.frame_id = 'velodyne' + boxes.header.stamp = lidar_msg.header.stamp + + pointpillars_detections = [] + with torch.no_grad(): + # Convert to tensor and format for PointPillars + lidar_tensor = torch.from_numpy(lidar_down).float() + if torch.cuda.is_available(): + lidar_tensor = lidar_tensor.cuda() + + # Format as batch with a single point cloud + batched_pts = [lidar_tensor] + + # Get PointPillars predictions + results = self.pointpillars(batched_pts, mode='test') + + if results and len(results) > 0 and 'lidar_bboxes' in results[0]: + # Process PointPillars results + pp_bboxes = results[0]['lidar_bboxes'] + pp_labels = results[0]['labels'] + pp_scores = results[0]['scores'] + + for i, (bbox, label, score) in enumerate(zip(pp_bboxes, pp_labels, pp_scores)): + # Skip low confidence detections and non-pedestrians (assuming class is 0) + if (score < self.score_threshold) or (label != 0): + continue + + # Extract center position and dimensions + x, y, z, l, w, h, yaw = bbox + + # Transform from LiDAR to vehicle coordinates + center_lidar = np.array([x, y, z, 1.0]) + center_vehicle = self.T_l2v @ center_lidar + x_vehicle, y_vehicle, z_vehicle = center_vehicle[:3] + + # Transform rotation from LiDAR to vehicle frame + R_lidar = R.from_euler('z', yaw).as_matrix() + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + + # Create a ROS BoundingBox message + box = BoundingBox() + box.header.frame_id = 'velodyne' + box.header.stamp = lidar_msg.header.stamp + + # Set the pose (position and orientation) + box.pose.position.x = float(x_vehicle) + box.pose.position.y = float(y_vehicle) + box.pose.position.z = float(z_vehicle) + + # Convert yaw to quaternion + quat = R.from_euler('z', yaw).as_quat() + box.pose.orientation.x = float(quat[0]) + box.pose.orientation.y = float(quat[1]) + box.pose.orientation.z = float(quat[2]) + box.pose.orientation.w = float(quat[3]) + + # Set the dimensions + box.dimensions.x = float(l) # length + box.dimensions.y = float(w) # width + box.dimensions.z = float(h) # height + + # Add class label and confidence score: + box.value = float(score) + box.label = int(label) + + boxes.boxes.append(box) + + # Also store detection info in the list if needed for other processing + pointpillars_detections.append({ + 'pose': { + 'position': [x, y, z], + 'orientation': [quat[0], quat[1], quat[2], quat[3]] + }, + 'dimensions': [l, w, h], + 'score': float(score) + }) + + rospy.loginfo(f"Pedestrian detected at ({x:.2f}, {y:.2f}, {z:.2f}) with score {score:.2f}") + + # Save point_pillars_detections here for later if needed + + # Publish the bounding boxes + rospy.loginfo(f"Publishing {len(boxes.boxes)} pedestrian bounding boxes") + self.pub.publish(boxes) + +if __name__ == '__main__': + try: + node = PointPillarsNode() + rospy.spin() + except rospy.ROSInterruptException: + pass \ No newline at end of file diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py new file mode 100644 index 000000000..1fb1df463 --- /dev/null +++ b/GEMstack/onboard/perception/yolo_node.py @@ -0,0 +1,510 @@ +# from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +# from ..interface.gem import GEMInterface +# from ..component import Component +# from perception_utils import * +from ultralytics import YOLO +import cv2 +from typing import Dict +import open3d as o3d +import numpy as np +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import os +import yaml +import ros_numpy + + +from sklearn.cluster import DBSCAN +import sensor_msgs.point_cloud2 as pc2 + +# Publisher imports: +from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray +from geometry_msgs.msg import Pose, Vector3 + +def cylindrical_roi(points, center, radius, height): + horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1) + vertical_diff = np.abs(points[:, 2] - center[2]) + mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2) + return points[mask] + + +def filter_points_within_threshold(points, threshold=15.0): + distances = np.linalg.norm(points, axis=1) + mask = distances <= threshold + return points[mask] + + +def extract_roi_box(lidar_pc, center, half_extents): + """ + Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. + """ + lower = center - half_extents + upper = center + half_extents + mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) + return lidar_pc[mask] + + +def pc2_to_numpy(pc2_msg, want_rgb=False): + """ + Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. + This function extracts the x, y, z coordinates from the point cloud. + """ + # Convert the ROS message to a numpy structured array + pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) + # Stack x,y,z fields to a (N,3) array + pts = np.stack((np.array(pc['x']).ravel(), + np.array(pc['y']).ravel(), + np.array(pc['z']).ravel()), axis=1) + # Apply filtering (for example, x > 0 and z in a specified range) + mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) + return pts[mask] + + +def backproject_pixel(u, v, K): + """ + Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. + """ + cx, cy = K[0, 2], K[1, 2] + fx, fy = K[0, 0], K[1, 1] + x = (u - cx) / fx + y = (v - cy) / fy + ray_dir = np.array([x, y, 1.0]) + return ray_dir / np.linalg.norm(ray_dir) + + +def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, + t_min, t_max, t_step, + distance_threshold, min_points, ransac_threshold): + """ + Identify the center of a human along a projected ray. + (This function is no longer used in the new approach.) + """ + return None, None, None + + +def extract_roi(pc, center, roi_radius): + """ + Extract points from a point cloud that lie within a specified radius of a center point. + """ + distances = np.linalg.norm(pc - center, axis=1) + return pc[distances < roi_radius] + + +def refine_cluster(roi_points, center, eps=0.2, min_samples=10): + """ + Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. + """ + if roi_points.shape[0] < min_samples: + return roi_points + clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) + labels = clustering.labels_ + valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] + if not valid_clusters: + return roi_points + best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) + return best_cluster + + +def remove_ground_by_min_range(cluster, z_range=0.05): + """ + Remove points within z_range of the minimum z (assumed to be ground). + """ + if cluster is None or cluster.shape[0] == 0: + return cluster + min_z = np.min(cluster[:, 2]) + filtered = cluster[cluster[:, 2] > (min_z + z_range)] + return filtered + + +def get_bounding_box_center_and_dimensions(points): + """ + Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. + """ + if points.shape[0] == 0: + return None, None + min_vals = np.min(points, axis=0) + max_vals = np.max(points, axis=0) + center = (min_vals + max_vals) / 2 + dimensions = max_vals - min_vals + return center, dimensions + + +def create_ray_line_set(start, end): + """ + Create an Open3D LineSet object representing a ray between two 3D points. + The line is colored yellow. + """ + points = [start, end] + lines = [[0, 1]] + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) + return line_set + + +def downsample_points(lidar_points, voxel_size=0.15): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(lidar_points) + down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) + return np.asarray(down_pcd.points) + + +def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True): + if lidar_points.shape[0] == 0: + return lidar_points + + if use_norm: + depths = np.linalg.norm(lidar_points, axis=1) + else: + depths = lidar_points[:, 0] + + min_depth = np.min(depths) + max_possible_depth = min_depth + max_depth_diff + mask = depths < max_possible_depth + return lidar_points[mask] + + +def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): + """ + Visualize a list of Open3D geometry objects in a dedicated window. + """ + vis = o3d.visualization.Visualizer() + vis.create_window(window_name=window_name, width=width, height=height) + for geom in geometries: + vis.add_geometry(geom) + opt = vis.get_render_option() + opt.point_size = point_size + vis.run() + vis.destroy_window() + + +def pose_to_matrix(pose): + """ + Compose a 4x4 transformation matrix from a pose state. + Assumes pose has attributes: x, y, z, yaw, pitch, roll, + where the angles are given in degrees. + """ + x = pose.x if pose.x is not None else 0.0 + y = pose.y if pose.y is not None else 0.0 + z = pose.z if pose.z is not None else 0.0 + if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: + yaw = pose.yaw + pitch = pose.pitch + roll = pose.roll + else: + yaw = 0.0 + pitch = 0.0 + roll = 0.0 + R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() + T = np.eye(4) + T[:3, :3] = R_mat + T[:3, 3] = np.array([x, y, z]) + return T + + +def transform_points_l2c(lidar_points, T_l2c): + N = lidar_points.shape[0] + pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) + pts_cam = (T_l2c @ pts_hom.T).T # (N,4) + return pts_cam[:, :3] + + +# ----- New: Vectorized projection function ----- +def project_points(pts_cam, K, original_lidar_points): + """ + Vectorized version. + pts_cam: (N,3) array of points in camera coordinates. + original_lidar_points: (N,3) array of points in LiDAR coordinates. + Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. + """ + mask = pts_cam[:, 2] > 0 + pts_cam_valid = pts_cam[mask] + lidar_valid = original_lidar_points[mask] + Xc = pts_cam_valid[:, 0] + Yc = pts_cam_valid[:, 1] + Zc = pts_cam_valid[:, 2] + u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) + v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) + proj = np.column_stack((u, v, lidar_valid)) + return proj + + +class YoloNode(): + """ + Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud + data by painting the points. Pedestrians are also detected with PointPillars + on the point cloud. The resulting 3D bounding boxes of each are fused together + with late sensor fusion. + + Tracking is optional: set `enable_tracking=False` to disable persistent tracking + and return only detections from the current frame. + """ + + def __init__( + self, + ): + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.camera_name = 'front' + self.camera_front = (self.camera_name=='front') + self.score_threshold = 0.4 + self.initialize() + + def initialize(self): + # --- Determine the correct RGB topic for this camera --- + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + # add additional camera mappings here if needed + } + rgb_topic = rgb_topic_map.get( + self.camera_name, + f'/{self.camera_name}/rgb/image_raw' + ) + + # Initialize YOLO node + rospy.init_node('yolo_box_publisher') + # Create bounding box publisher + self.pub = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=10) + rospy.loginfo("YOLO node initialized and waiting for messages.") + + if self.camera_front: + self.K = np.array([[684.83331299, 0., 573.37109375], + [0., 684.60968018, 363.70092773], + [0., 0., 1.]]) + else: + self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02], + [0.00000000e+00, 1.17514569e+03, 6.08580326e+02], + [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) + + if self.camera_front: + self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) + else: + self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05, + -6.19939758e-02]) + + self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0., 0., 0., 1.]]) + if self.camera_front: + self.T_l2c = np.array([ + [0.001090, -0.999489, -0.031941, 0.149698], + [-0.007664, 0.031932, -0.999461, -0.397813], + [0.999970, 0.001334, -0.007625, -0.691405], + [0., 0., 0., 1.000000] + ]) + else: + self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003], + [-0.09720448, 0.13371206, -0.98624154, -0.1598301], + [0.68884317, -0.7061996, -0.16363744, -1.04767285], + [0., 0., 0., 1.]] + ) + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] + self.camera_origin_in_lidar = self.T_c2l[:3, 3] + + # Initialize the YOLO detector + self.detector = YOLO('GEMstack/knowledge/detection/yolov8n.pt') # 'GEMstack/knowledge/detection/cone.pt') + self.detector.to('cuda') + + # Subscribe to the RGB and LiDAR streams + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer([ + self.rgb_sub, self.lidar_sub + ], queue_size=500, slop=0.05) + self.sync.registerCallback(self.synchronized_callback) + + def synchronized_callback(self, image_msg, lidar_msg): + rospy.loginfo("Received synchronized RGB and LiDAR messages") + try: + self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") + except Exception as e: + rospy.logerr("Failed to convert image: {}".format(e)) + self.latest_image = None + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + + # Gate guards against data not being present for both sensors: + if self.latest_image is None or self.latest_lidar is None: + return {} + else: + print(type(self.latest_image)) + print(type(self.latest_lidar)) + lastest_image = self.latest_image.copy() + + downsample = False + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + # if self.start_time is None: + # self.start_time = current_time + # time_elapsed = current_time - self.start_time + + if self.camera_front == False: + start = time.time() + undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D) + end = time.time() + # print('-------processing time undistort_image---', end -start) + self.current_K = current_K + orig_H, orig_W = undistorted_img.shape[:2] + + # --- Begin modifications for three-angle detection --- + img_normal = undistorted_img + else: + img_normal = lastest_image.copy() + undistorted_img = lastest_image.copy() + orig_H, orig_W = lastest_image.shape[:2] + self.current_K = self.K + results_normal = self.detector(img_normal, conf=0.35, classes=[0]) + combined_boxes = [] + + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + # for box in boxes_normal: + # cx, cy, w, h = box + # combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) + + start = time.time() + # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. + # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) + pts_cam = transform_points_l2c(lidar_down, self.T_l2c) + projected_pts = project_points(pts_cam, self.current_K, lidar_down) + # What is returned: + # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position) + # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) + # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame + + # Create empty list of bounding boxes to fill and publish later + boxes = BoundingBoxArray() + boxes.header.frame_id = 'velodyne' + boxes.header.stamp = lidar_msg.header.stamp + + # Process YOLO detections + yolo_detections = [] + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + conf_scores = np.array(results_normal[0].boxes.conf.cpu()) if len(results_normal) > 0 else [] + + for i, box in enumerate(boxes_normal): + # Skip low confidence detections + if conf_scores[i] < self.score_threshold: + continue + + # Calculate the 2D bounding box in the image + cx, cy, w, h = box + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + + # Find LiDAR points that project to this box + mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ + (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) + roi_pts = projected_pts[mask] + + # Ignore regions with too few points + if roi_pts.shape[0] < 5: + continue + + # Get the 3D points corresponding to the box + points_3d = roi_pts[:, 2:5] + + # Filter points to get a cleaner cluster + points_3d = filter_points_within_threshold(points_3d, 40) + points_3d = remove_ground_by_min_range(points_3d, z_range=0.08) + points_3d = filter_depth_points(points_3d, max_depth_diff=0.5) + + if points_3d.shape[0] < 4: + continue + + # Create a point cloud from the filtered points + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points_3d) + + # Get an oriented bounding box + obb = pcd.get_oriented_bounding_box() + center = obb.center + dims = tuple(obb.extent) + R_lidar = obb.R.copy() + + # Transform from LiDAR to vehicle coordinates + center_hom = np.append(center, 1) + center_vehicle_hom = self.T_l2v @ center_hom + center_vehicle = center_vehicle_hom[:3] + + # Calculate rotation in vehicle frame + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + + # Create a ROS BoundingBox message + box_msg = BoundingBox() + box_msg.header.frame_id = 'velodyne' + box_msg.header.stamp = lidar_msg.header.stamp + + # Set the pose + box_msg.pose.position.x = float(center_vehicle[0]) + box_msg.pose.position.y = float(center_vehicle[1]) + box_msg.pose.position.z = float(center_vehicle[2]) + + # Convert yaw to quaternion + quat = R.from_euler('z', yaw).as_quat() + box_msg.pose.orientation.x = float(quat[0]) + box_msg.pose.orientation.y = float(quat[1]) + box_msg.pose.orientation.z = float(quat[2]) + box_msg.pose.orientation.w = float(quat[3]) + + # Set the dimensions + box_msg.dimensions.x = float(dims[0]) # length + box_msg.dimensions.y = float(dims[1]) # width + box_msg.dimensions.z = float(dims[2]) # height + + # Add confidence score and label + box_msg.value = float(conf_scores[i]) + box_msg.label = 0 # person/pedestrian class + + boxes.boxes.append(box_msg) + + # Store detection info + yolo_detections.append({ + 'pose': { + 'position': center_vehicle, + 'orientation': [quat[0], quat[1], quat[2], quat[3]] + }, + 'dimensions': dims, + 'score': float(conf_scores[i]) + }) + + rospy.loginfo(f"Person detected at ({center_vehicle[0]:.2f}, {center_vehicle[1]:.2f}, {center_vehicle[2]:.2f}) with score {conf_scores[i]:.2f}") + + # Publish the bounding boxes + rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes") + self.pub.publish(boxes) + + def undistort_image(self, image, K, D): + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + if self.undistort_map1 is None or self.undistort_map2 is None: + self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, + newCameraMatrix=newK, size=(w, h), + m1type=cv2.CV_32FC1) + + start = time.time() + undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST) + end = time.time() + # print('--------undistort', end-start) + return undistorted, newK + + +if __name__ == '__main__': + try: + node = YoloNode() + rospy.spin() + except rospy.ROSInterruptException: + pass \ No newline at end of file From 0c51d415d32c538c91ac49a81c66b46adbe5e594 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sat, 3 May 2025 21:55:01 -0500 Subject: [PATCH 15/55] Docker container now subscribes to and publishes to nodes outside of the container --- .../perception/setup/Dockerfile.cuda111 | 30 +++++++++++++------ .../perception/setup/docker-compose.yaml | 10 ++++--- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/GEMstack/onboard/perception/setup/Dockerfile.cuda111 b/GEMstack/onboard/perception/setup/Dockerfile.cuda111 index 6b901b289..d212781ec 100644 --- a/GEMstack/onboard/perception/setup/Dockerfile.cuda111 +++ b/GEMstack/onboard/perception/setup/Dockerfile.cuda111 @@ -66,8 +66,15 @@ RUN pip3 install numba==0.48.0 numpy==1.19.5 PyYAML==6.0 setuptools==58.0.4 tens RUN pip3 install opencv-python==4.5.5.62 # Install Open3D directly using a specific wheel for Python 3.8 -RUN pip3 install --no-cache-dir --pre https://storage.googleapis.com/open3d-releases-master/python-wheels/open3d-0.14.1-cp38-cp38-linux_x86_64.whl || \ - pip3 install --no-cache-dir open3d==0.13.0 +# RUN pip3 install --no-cache-dir --pre https://storage.googleapis.com/open3d-releases-master/python-wheels/open3d-0.14.1-cp38-cp38-linux_x86_64.whl || \ +# pip3 install --no-cache-dir open3d==0.13.0 || \ +# pip3 install --no-cache-dir open3d==0.12.0 +# RUN pip3 install --no-cache-dir \ +# https://storage.googleapis.com/open3d-releases-master/python-wheels/open3d-0.14.1-cp38-cp38-linux_x86_64.whl +RUN pip3 install --upgrade pip setuptools wheel +RUN pip3 install open3d==0.14.1 pandas==1.1.5 + + # Build and install PointPillars RUN python3 setup.py build_ext --inplace @@ -76,11 +83,16 @@ RUN pip3 install . # Add ROS paths to bashrc RUN echo "source /opt/ros/noetic/setup.bash" >> /home/${USER}/.bashrc -# Create startup script -RUN echo '#!/bin/bash' > /home/${USER}/start_node.sh \ - && echo 'cd /home/${USER}/pointpillars_ws' >> /home/${USER}/start_node.sh \ - && echo 'python3 point_pillars_node.py' >> /home/${USER}/start_node.sh \ - && chmod +x /home/${USER}/start_node.sh +RUN apt-get update && apt-get install -y ros-noetic-ros-numpy + +# Create startup script with hardcoded path to avoid variable substitution issues +RUN echo '#!/bin/bash' > /home/ppuser/start_node.sh \ + && echo 'source /opt/ros/noetic/setup.bash' >> /home/ppuser/start_node.sh \ +# && echo 'source ~/catkin_ws/devel/setup.bash' >> /home/ppuser/start_node.sh \ + && echo 'cd /home/ppuser/pointpillars_ws' >> /home/ppuser/start_node.sh \ + && echo 'echo "Starting PointPillars node..."' >> /home/ppuser/start_node.sh \ + && echo 'exec python3 point_pillars_node.py' >> /home/ppuser/start_node.sh \ + && chmod +x /home/ppuser/start_node.sh # Set ownership of the workspace RUN chown -R ${USER}:${USER} /home/${USER} @@ -88,5 +100,5 @@ RUN chown -R ${USER}:${USER} /home/${USER} USER ${USER} WORKDIR /home/${USER}/pointpillars_ws -# Command to run when the container starts -CMD ["/bin/bash", "-c", "/home/${USER}/start_node.sh"] +# Command to run when the container starts - use hardcoded path +CMD ["/bin/bash", "-c", "/home/ppuser/start_node.sh"] diff --git a/GEMstack/onboard/perception/setup/docker-compose.yaml b/GEMstack/onboard/perception/setup/docker-compose.yaml index 0e971dfa6..50d95d193 100644 --- a/GEMstack/onboard/perception/setup/docker-compose.yaml +++ b/GEMstack/onboard/perception/setup/docker-compose.yaml @@ -15,8 +15,8 @@ services: tty: true volumes: # Mount the point_pillars_node.py file and model weights - - "./point_pillars_node.py:/home/ppuser/pointpillars_ws/point_pillars_node.py:ro" - - "./epoch_160.pth:/home/ppuser/pointpillars_ws/epoch_160.pth:ro" + - "../point_pillars_node.py:/home/ppuser/pointpillars_ws/point_pillars_node.py:ro" + - "../epoch_160.pth:/home/ppuser/pointpillars_ws/epoch_160.pth:ro" # Mount host home directory (optional) - "~:/home/ppuser/host:ro" - "/tmp/.X11-unix:/tmp/.X11-unix:rw" @@ -24,11 +24,13 @@ services: - DISPLAY=${DISPLAY} # ROS master settings to connect to external roscore - ROS_MASTER_URI=http://host.docker.internal:11311 - - ROS_HOSTNAME=point_pillars_container - - ROS_IP=127.0.0.1 + - ROS_HOSTNAME=localhost + - ROS_IP= - NVIDIA_DRIVER_CAPABILITIES=all - NVIDIA_VISIBLE_DEVICES=all network_mode: "host" # Use host network for direct ROS communication + extra_hosts: + - "host.docker.internal:host-gateway" # Added this line to resolve host.docker.internal ipc: host user: "ppuser:ppuser" # Fixed username to match Dockerfile restart: unless-stopped # Auto-restart if the container stops From baf9bb524135869f34bb645c0f9ed7854c89cbfd Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sat, 3 May 2025 22:05:25 -0500 Subject: [PATCH 16/55] Updated some documentation. Put notes in for future place for someone to work off of --- .../onboard/perception/combined_detection.py | 32 +--- .../onboard/perception/lidar_detection.py | 172 ------------------ .../onboard/perception/point_pillars_node.py | 8 +- GEMstack/onboard/perception/yolo_node.py | 7 +- 4 files changed, 9 insertions(+), 210 deletions(-) delete mode 100644 GEMstack/onboard/perception/lidar_detection.py diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 59eb369f5..8f041e6c0 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -16,36 +16,14 @@ import os import yaml -# PointPillars imports: -import torch - -# Import compatibility layer first -try: - import numpy_compat # This will patch numpy before other imports -except ImportError: - # Create inline patch if file doesn't exist - import numpy as np - if not hasattr(np, 'long'): - np.long = np.int64 - -# Now try importing PointPillars -try: - from pointpillars.model import PointPillars -except ImportError as e: - # Provide a fallback - print(f"Warning: Failed to import PointPillars: {e}") - # Define a dummy PointPillars class to prevent further errors - class PointPillars: - def __init__(self, *args, **kwargs): - raise NotImplementedError("PointPillars failed to import correctly") - class CombinedDetector3D(Component): """ - Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud - data by painting the points. Pedestrians are also detected with PointPillars - on the point cloud. The resulting 3D bounding boxes of each are fused together - with late sensor fusion. + Fuses the boxes in the lists of bounding boxes published by YoloNode and + PointPillarsNode with late sensor fusion. + TODO: SUBSCRIBE TO BOUNDING BOX LISTS AND PERFORM LATE SENSOR FUSION IN THIS FILE. + TODO: MODIFY YAML FILE FOR THE CONTROL TEAM'S BASIC PATH PLANNING CODE + TODO: REMOVE SOME CAMERA CALIBRATION MATRICES FROM FILE. USED IN THE NODES. Tracking is optional: set `enable_tracking=False` to disable persistent tracking and return only detections from the current frame. diff --git a/GEMstack/onboard/perception/lidar_detection.py b/GEMstack/onboard/perception/lidar_detection.py deleted file mode 100644 index 17eb6ba76..000000000 --- a/GEMstack/onboard/perception/lidar_detection.py +++ /dev/null @@ -1,172 +0,0 @@ -# from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum -# from ..interface.gem import GEMInterface -# from ..component import Component -# from perception_utils import * -import numpy as np -from scipy.spatial.transform import Rotation as R -import rospy -from sensor_msgs.msg import PointCloud2, Image -from message_filters import Subscriber, ApproximateTimeSynchronizer -from cv_bridge import CvBridge -import time -import os - -# PointPillars imports: -import torch -from pointpillars.model import PointPillars - -# Publisher imports: -from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray -from geometry_msgs.msg import Pose, Vector3 - - -import numpy as np -import ros_numpy -def pc2_to_numpy(pc2_msg, want_rgb=False): - """ - Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. - This function extracts the x, y, z coordinates from the point cloud. - """ - print(pc2_msg.fields) - # Convert the ROS message to a numpy structured array - pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) - # Stack x,y,z fields to a (N,3) array - pts = np.stack((np.array(pc['x']).ravel(), - np.array(pc['y']).ravel(), - np.array(pc['z']).ravel(), - np.array(pc['intensity']).ravel()), axis=1) - # Apply filtering (for example, x > 0 and z in a specified range) - mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) - return pts[mask] - -class PointPillarsNode(): - """ - Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud - data by painting the points. Pedestrians are also detected with PointPillars - on the point cloud. The resulting 3D bounding boxes of each are fused together - with late sensor fusion. - - Tracking is optional: set `enable_tracking=False` to disable persistent tracking - and return only detections from the current frame. - """ - - def __init__( - self, - ): - self.latest_image = None - self.latest_lidar = None - self.bridge = CvBridge() - self.camera_name = 'front' - self.camera_front = (self.camera_name=='front') - self.initialize() - - def initialize(self): - # --- Determine the correct RGB topic for this camera --- - rgb_topic_map = { - 'front': '/oak/rgb/image_raw', - 'front_right': '/camera_fr/arena_camera_node/image_raw', - # add additional camera mappings here if needed - } - rgb_topic = rgb_topic_map.get( - self.camera_name, - f'/{self.camera_name}/rgb/image_raw' - ) - - # Initialize PointPillars node - rospy.init_node('pointpillars_box_publisher') - - # Subscribe to the RGB and LiDAR streams - self.rgb_sub = Subscriber(rgb_topic, Image) - self.lidar_sub = Subscriber('/ouster/points', PointCloud2) - self.sync = ApproximateTimeSynchronizer([ - self.rgb_sub, self.lidar_sub - ], queue_size=500, slop=0.05) - self.sync.registerCallback(self.synchronized_callback) - - # Initialize PointPillars - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - self.pointpillars = PointPillars( - nclasses=3, - voxel_size=[0.16, 0.16, 4], - point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], - max_num_points=32, - max_voxels=(16000, 40000) - ) - self.pointpillars.to(device) - - model_path = 'epoch_160.pth' - checkpoint = torch.load(model_path) #, map_location='cuda' if torch.cuda.is_available() else 'cpu') - self.pointpillars.load_state_dict(checkpoint) - - # if torch.cuda.is_available(): - # self.pointpillars = self.pointpillars.cuda() - - self.pointpillars.eval() - rospy.loginfo("PointPillars model loaded successfully") - - # Create bounding box publisher - self.pub = rospy.Publisher('/pointpillars_boxes', BoundingBoxArray, queue_size=10) - - rospy.loginfo("PointPillars node initialized and waiting for messages.") - - def synchronized_callback(self, image_msg, lidar_msg): - rospy.loginfo("Received synchronized RGB and LiDAR messages") - self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) - - downsample = False - - if downsample: - lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) - else: - lidar_down = self.latest_lidar.copy() - - boxes = BoundingBoxArray() - boxes.header.frame_id = 'velodyne' - boxes.header.stamp = lidar_msg.header.stamp - - pointpillars_detections = [] - with torch.no_grad(): - # Convert to tensor and format for PointPillars - lidar_tensor = torch.from_numpy(lidar_down).float() - if torch.cuda.is_available(): - lidar_tensor = lidar_tensor.cuda() - - # Format as batch with a single point cloud - batched_pts = [lidar_tensor] - - # Get PointPillars predictions - results = self.pointpillars(batched_pts, mode='test') - - if results and len(results) > 0 and 'lidar_bboxes' in results[0]: - # Process PointPillars results - pp_bboxes = results[0]['lidar_bboxes'] - pp_labels = results[0]['labels'] - pp_scores = results[0]['scores'] - - for i, (bbox, label, score) in enumerate(zip(pp_bboxes, pp_labels, pp_scores)): - # Skip low confidence detections - if score < 0.5: # Adjust threshold as needed - continue - - rospy.loginfo(f"PointPillars detected {bbox} objects") - - # Extract center position and dimensions - x, y, z, l, w, h, yaw = bbox - center = np.array([x, y, z]) - dims = (l, w, h) - - pointpillars_detections.append({ - 'pose': pp_pose, - 'dimensions': dims, - 'activity': activity, - 'score': score - }) - - rospy.loginfo(f"PointPillars detected {len(pointpillars_detections)} objects") - -if __name__ == '__main__': - try: - node = PointPillarsNode() - rospy.spin() - except rospy.ROSInterruptException: - pass \ No newline at end of file diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 479b0efce..954a1e5bd 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -40,13 +40,7 @@ def pc2_to_numpy_with_intensity(pc2_msg, want_rgb=False): class PointPillarsNode(): """ - Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud - data by painting the points. Pedestrians are also detected with PointPillars - on the point cloud. The resulting 3D bounding boxes of each are fused together - with late sensor fusion. - - Tracking is optional: set `enable_tracking=False` to disable persistent tracking - and return only detections from the current frame. + Detects Pedestrians with PointPillars and publishes the results in vehicle frame. """ def __init__( diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index 1fb1df463..747c71108 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -237,9 +237,8 @@ def project_points(pts_cam, K, original_lidar_points): class YoloNode(): """ Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud - data by painting the points. Pedestrians are also detected with PointPillars - on the point cloud. The resulting 3D bounding boxes of each are fused together - with late sensor fusion. + data by painting the points. The painted data is converted to vehicle + frame and then published as a list of bounding boxes. Tracking is optional: set `enable_tracking=False` to disable persistent tracking and return only detections from the current frame. @@ -311,7 +310,7 @@ def initialize(self): self.camera_origin_in_lidar = self.T_c2l[:3, 3] # Initialize the YOLO detector - self.detector = YOLO('GEMstack/knowledge/detection/yolov8n.pt') # 'GEMstack/knowledge/detection/cone.pt') + self.detector = YOLO('yolov8n.pt') # 'GEMstack/knowledge/detection/cone.pt') self.detector.to('cuda') # Subscribe to the RGB and LiDAR streams From 7ace7ee6b2ce10c0f578b81eac6b73f97644366b Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sun, 4 May 2025 08:49:55 -0500 Subject: [PATCH 17/55] Was not required to have Nvidia Toolkit on system so removing Team B infrastructure's get_nvidia_container.sh file. POINTPILLARS ONLY NEEDS AN NVIDIA DRIVER (such as nvidia-driver-535) TO RUN --- GEMstack/onboard/perception/build_point_pillars.sh | 14 +------------- .../onboard/perception/setup/Dockerfile.cuda111 | 9 +-------- .../perception/setup/get_nvidia_container.sh | 14 -------------- 3 files changed, 2 insertions(+), 35 deletions(-) delete mode 100755 GEMstack/onboard/perception/setup/get_nvidia_container.sh diff --git a/GEMstack/onboard/perception/build_point_pillars.sh b/GEMstack/onboard/perception/build_point_pillars.sh index 8b50ac246..c958ea836 100644 --- a/GEMstack/onboard/perception/build_point_pillars.sh +++ b/GEMstack/onboard/perception/build_point_pillars.sh @@ -1,18 +1,5 @@ #!/bin/bash -# Check if nvidia-docker is installed -if ! command -v nvidia-container-toolkit &> /dev/null; then - echo "NVIDIA Container Toolkit not found. Do you want to install it? (y/n)" - read -p ">" choice - if [ "$choice" = "y" ] || [ "$choice" = "Y" ]; then - echo "Installing NVIDIA Container Toolkit..." - ./setup/get_nvidia_container.sh - else - echo "NVIDIA Container Toolkit is required for GPU support." - echo "You can install it later by running ./get_nvidia_container.sh" - fi -fi - # Check if the point_pillars_node.py and model weights exist if [ ! -f "point_pillars_node.py" ]; then echo "ERROR: point_pillars_node.py not found in the current directory!" @@ -44,6 +31,7 @@ if ! docker compose -f setup/docker-compose.yaml build; then # sudo -E docker compose -f setup/docker-compose.yaml build fi +# Notify user of how to run the container echo "Build complete. To start the container, run:" echo "docker compose -f setup/docker-compose.yaml up" echo "" diff --git a/GEMstack/onboard/perception/setup/Dockerfile.cuda111 b/GEMstack/onboard/perception/setup/Dockerfile.cuda111 index d212781ec..680c21c45 100644 --- a/GEMstack/onboard/perception/setup/Dockerfile.cuda111 +++ b/GEMstack/onboard/perception/setup/Dockerfile.cuda111 @@ -65,17 +65,10 @@ RUN pip3 install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 \ RUN pip3 install numba==0.48.0 numpy==1.19.5 PyYAML==6.0 setuptools==58.0.4 tensorboard tqdm==4.62.3 RUN pip3 install opencv-python==4.5.5.62 -# Install Open3D directly using a specific wheel for Python 3.8 -# RUN pip3 install --no-cache-dir --pre https://storage.googleapis.com/open3d-releases-master/python-wheels/open3d-0.14.1-cp38-cp38-linux_x86_64.whl || \ -# pip3 install --no-cache-dir open3d==0.13.0 || \ -# pip3 install --no-cache-dir open3d==0.12.0 -# RUN pip3 install --no-cache-dir \ -# https://storage.googleapis.com/open3d-releases-master/python-wheels/open3d-0.14.1-cp38-cp38-linux_x86_64.whl +# Upgrade pip, setuptools, and wheel to try to install the required versions of Open3D and pandas RUN pip3 install --upgrade pip setuptools wheel RUN pip3 install open3d==0.14.1 pandas==1.1.5 - - # Build and install PointPillars RUN python3 setup.py build_ext --inplace RUN pip3 install . diff --git a/GEMstack/onboard/perception/setup/get_nvidia_container.sh b/GEMstack/onboard/perception/setup/get_nvidia_container.sh deleted file mode 100755 index 4f738e2c8..000000000 --- a/GEMstack/onboard/perception/setup/get_nvidia_container.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ - && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ - sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ - sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list - -sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list - -sudo apt-get update -sudo apt-get install -y nvidia-container-toolkit - -sudo nvidia-ctk runtime configure --runtime=docker -sudo systemctl restart docker \ No newline at end of file From 2ca4120218c5d77c19570d79fcf85e683957fa4e Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sun, 4 May 2025 08:50:47 -0500 Subject: [PATCH 18/55] Created a readme file for the perception team and wrote initial sensor fusion documentation and set up instructions --- GEMstack/onboard/perception/README.md | 70 +++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 GEMstack/onboard/perception/README.md diff --git a/GEMstack/onboard/perception/README.md b/GEMstack/onboard/perception/README.md new file mode 100644 index 000000000..59095867c --- /dev/null +++ b/GEMstack/onboard/perception/README.md @@ -0,0 +1,70 @@ +# Perception Team +This folder contains code that is used to detect objects in 3D space and subsequently notify other GEMstack components of the detected objects. It is split up into 3 main areas: Pedestrian detection code created at the beginning-middle of the course, Cone Detection code that is used to detect cones (optionally with orientation), and Sensor Fusion code which fuses YOLO + painted lidar data and PointPillars 3D bounding boxes to detect pedestrians. + +## Cone Detection +Description goes here. + +### Relevant Files +- cone_detection.py +- perception_utils.py + + +## Sensor Fusion +To improve the quality of the detected pedestrians, we decided to fuse detections from multiple modalities to take advantage of the strengths each sensor (camera and lidar in our case) provides. We accomplished this by fusing the 3D bounding box detections of pedestrians generated by YOLO (model which detects pedestrians with camera data) + painted lidar data and PointPillars (model which detects pedestrians with only lidar data). + +### Relevant Files +#### Setup files to create PointPillars Cuda 11.1.1 Docker container: +- build_point_pillars.sh +- setup/docker-compose.yaml +- setup/Dockerfile.cuda111 + +#### Code used to detect pedestrians: +- combined_detection.py +- point_pillars_node.py +- yolo_node.py + +#### Code used to analyze the results of detections and extract data from rosbags for further analysis: +- eval_3d_bbox_performance.py +- rosbag_processor.py +- test_eval_3d_bbox_performance.py + +### Local Installation Steps for PointPillars Docker Container +#### READ BEFOREHAND: +- Before perfoming installation steps, please make sure you source ALL terminal windows (except for docker terminal window). +$ source /opt/ros/noetic/setup.bash +$ source ~/catkin_ws/devel/setup.bash +- These instructions were written with the assumption that you are running them inside of the outermost GEMstack folder. +- If you have set up issues please read the "Set Up Issues Known Fixes" section at the bottom. + +#### Steps: +1. Install Docker +2. Install Docker Compose +3. A bash script was created to handle docker permissions issues and make the set up process simpler: +$ bash GEMstack/onboard/perception/build_point_pillars.sh +4. Start the container (use sudo if you run into permissions issues) +$ docker compose -f GEMstack/onboard/perception/setup/docker-compose.yaml up +5. Run roscore on local machine (make sure you source first) +$ roscore +6. Start up YOLO node (make sure you source first): +$ python3 GEMstack/onboard/perception/yolo_node.py +7. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first): +$ python3 main.py --variant=detector_only launch/combined_detection.yaml +8. Run a rosbag on a loop (make sure you source first): +$ rosbag play -l yourRosbagNameGoesHere.bag + +### Vehicle Installation Steps for PointPillars Docker Container +Perform the same setup steps as the above section with the below exceptions: +1. Ensure you source instead with the following command: +$ source ~/demo_ws/devel/setup.bash +2. Initialize the sensors: +$ roslaunch basic_launch sensor_init.launch +3. Initialize GNSS (if you need it) +$ roslaunch basic_launch visualization.launch +4. Do not run a rosbag in Step 8 above (it's not needed since you'll be getting live data from the vehicle) + +#### Known Fixes for Set Up Issues +1. If you get a shape error when creating the "results_normal" variable in yolo_node.py, please downgrade your Ultralytics version to 8.1.5 (this is the version used on the car at the time of writing this): +$ pip install 'ultralytics==8.1.5' +2. If you run into communication issues with ROS, please make sure you have sourced EVERY terminal window (except for docker window there's no need to): +$ source /opt/ros/noetic/setup.bash +$ source ~/catkin_ws/devel/setup.bash \ No newline at end of file From 3241c742d5543973ff5ae936fa21e283537e009c Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sun, 4 May 2025 10:47:32 -0500 Subject: [PATCH 19/55] Cleaned up CombinedDetector3D and it's yaml file and documented plan for next steps --- .../onboard/perception/combined_detection.py | 572 ++---------------- launch/combined_detection.yaml | 9 +- 2 files changed, 43 insertions(+), 538 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 8f041e6c0..958d5b274 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -2,28 +2,22 @@ from ..interface.gem import GEMInterface from ..component import Component from .perception_utils import * -from ultralytics import YOLO -import cv2 from typing import Dict -import open3d as o3d -import numpy as np -from scipy.spatial.transform import Rotation as R import rospy -from sensor_msgs.msg import PointCloud2, Image from message_filters import Subscriber, ApproximateTimeSynchronizer -from cv_bridge import CvBridge import time import os import yaml +from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray + class CombinedDetector3D(Component): """ Fuses the boxes in the lists of bounding boxes published by YoloNode and PointPillarsNode with late sensor fusion. TODO: SUBSCRIBE TO BOUNDING BOX LISTS AND PERFORM LATE SENSOR FUSION IN THIS FILE. - TODO: MODIFY YAML FILE FOR THE CONTROL TEAM'S BASIC PATH PLANNING CODE - TODO: REMOVE SOME CAMERA CALIBRATION MATRICES FROM FILE. USED IN THE NODES. + TODO: MODIFY YAML FILE FOR THE CONTROL TEAM'S BASIC PATH PLANNING CODE? Tracking is optional: set `enable_tracking=False` to disable persistent tracking and return only detections from the current frame. @@ -35,14 +29,7 @@ class CombinedDetector3D(Component): def __init__( self, vehicle_interface: GEMInterface, - camera_name: str, - camera_calib_file: str, enable_tracking: bool = True, - visualize_2d: bool = False, - use_cyl_roi: bool = False, - T_l2v: list = None, - save_data: bool = True, - orientation: bool = True, use_start_frame: bool = True, **kwargs ): @@ -50,54 +37,16 @@ def __init__( self.vehicle_interface = vehicle_interface self.current_agents = {} self.tracked_agents = {} - self.ped_counter = 0 - self.latest_image = None - self.latest_lidar = None - self.bridge = CvBridge() + self.ped_counter = 0 + self.latest_yolo_bbxs = None # Stores the latest list of YOLO bounding boxes + self.latest_pp_bbxs = None # Stores the latest list of PointPillars bounding boxes self.start_pose_abs = None self.start_time = None # Config flags - self.camera_name = camera_name self.enable_tracking = enable_tracking - self.visualize_2d = visualize_2d - self.use_cyl_roi = use_cyl_roi - self.save_data = save_data - self.orientation = orientation self.use_start_frame = use_start_frame - # 1) Load lidar→vehicle transform - if T_l2v is not None: - self.T_l2v = np.array(T_l2v) - else: - self.T_l2v = np.array([ - [0.99939639, 0.02547917, 0.023615, 1.1], - [-0.02530848, 0.99965156, -0.00749882, 0.03773583], - [-0.02379784, 0.00689664, 0.999693, 1.95320223], - [0.0, 0.0, 0.0, 1.0] - ]) - - # 2) Load camera intrinsics/extrinsics from the supplied YAML - with open(camera_calib_file, 'r') as f: - calib = yaml.safe_load(f) - - # Expect structure: - # cameras: - # front: - # K: [[...], [...], [...]] - # D: [...] - # T_l2c: [[...], ..., [...]] - cam_cfg = calib['cameras'][camera_name] - self.K = np.array(cam_cfg['K']) - self.D = np.array(cam_cfg['D']) - self.T_l2c = np.array(cam_cfg['T_l2c']) - - # Derived transforms - - self.undistort_map1 = None - self.undistort_map2 = None - self.camera_front = (camera_name=='front') - def rate(self) -> float: return 8 @@ -108,501 +57,64 @@ def state_outputs(self) -> list: return ['agents'] def initialize(self): - # --- Determine the correct RGB topic for this camera --- - rgb_topic_map = { - 'front': '/oak/rgb/image_raw', - 'front_right': '/camera_fr/arena_camera_node/image_raw', - # add additional camera mappings here if needed - } - rgb_topic = rgb_topic_map.get( - self.camera_name, - f'/{self.camera_name}/rgb/image_raw' - ) - - # Subscribe to the RGB and LiDAR streams - self.rgb_sub = Subscriber(rgb_topic, Image) - self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + # Subscribe to the BoundingBox + self.yolo_sub = Subscriber('/yolo_boxes', BoundingBoxArray) + self.pp_sub = Subscriber('/pointpillars_boxes', BoundingBoxArray) self.sync = ApproximateTimeSynchronizer([ - self.rgb_sub, self.lidar_sub - ], queue_size=500, slop=0.05) + self.yolo_sub, self.pp_sub + ], queue_size=50, slop=0.05) # GREATLY DECREASED QUEUE SIZE, 50 might even be too much self.sync.registerCallback(self.synchronized_callback) - # Initialize the YOLO detector - self.detector = YOLO('GEMstack/knowledge/detection/yolov8n.pt') # 'GEMstack/knowledge/detection/cone.pt') - self.detector.to('cuda') - - # Initialize PointPillars - self.pointpillars = PointPillars( - nclasses=3, - voxel_size=[0.16, 0.16, 4], - point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], - max_num_points=32, - max_voxels=(16000, 40000) - ) - - model_path = 'GEMstack/knowledge/detection/epoch_160.pth' - if torch.cuda.is_available(): - checkpoint = torch.load(model_path) - self.pointpillars.load_state_dict(checkpoint['model_state_dict']) - self.pointpillars.to('cuda') - else: - checkpoint = torch.load(model_path, map_location=torch.device('cpu')) - self.pointpillars.load_state_dict(checkpoint['model_state_dict']) - self.pointpillars.eval() # Set to evaluation mode - rospy.loginfo("PointPillars model loaded successfully") - - # Initialize the calibration matrices: - self.T_c2l = np.linalg.inv(self.T_l2c) - self.R_c2l = self.T_c2l[:3, :3] - self.camera_origin_in_lidar = self.T_c2l[:3, 3] - - def synchronized_callback(self, image_msg, lidar_msg): - step1 = time.time() - try: - self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") - except Exception as e: - rospy.logerr("Failed to convert image: {}".format(e)) - self.latest_image = None - step2 = time.time() - self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) - step3 = time.time() - # print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2) - - def undistort_image(self, image, K, D): - h, w = image.shape[:2] - newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) - if self.undistort_map1 is None or self.undistort_map2 is None: - self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, - newCameraMatrix=newK, size=(w, h), - m1type=cv2.CV_32FC1) - - start = time.time() - undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST) - end = time.time() - # print('--------undistort', end-start) - return undistorted, newK + def synchronized_callback(self, yolo_bbxs_msg, pp_bbxs_msg): + self.latest_yolo_bbxs = yolo_bbxs_msg + self.latest_pp_bbxs = pp_bbxs_msg def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: - downsample = False # Gate guards against data not being present for both sensors: - if self.latest_image is None or self.latest_lidar is None: + if self.latest_yolo_bbxs is None or self.latest_pp_bbxs is None: return {} - lastest_image = self.latest_image.copy() # Set up current time variables start = time.time() current_time = self.vehicle_interface.time() - if downsample: - lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) - else: - lidar_down = self.latest_lidar.copy() - if self.start_time is None: self.start_time = current_time time_elapsed = current_time - self.start_time - # Ensure data/ exists and build timestamp - if self.save_data: - self.save_sensor_data(vehicle=vehicle, latest_image=latest_image) - - if self.camera_front == False: - start = time.time() - undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D) - end = time.time() - # print('-------processing time undistort_image---', end -start) - self.current_K = current_K - orig_H, orig_W = undistorted_img.shape[:2] - - # --- Begin modifications for three-angle detection --- - img_normal = undistorted_img - else: - img_normal = lastest_image.copy() - undistorted_img = lastest_image.copy() - orig_H, orig_W = lastest_image.shape[:2] - self.current_K = self.K - results_normal = self.detector(img_normal, conf=0.35, classes=[0]) - combined_boxes = [] - if not self.enable_tracking: - self.ped_counter = 0 - if self.orientation: - img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE) - img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE) - results_left = self.detector(img_left, conf=0.1, classes=[0]) - results_right = self.detector(img_right, conf=0.1, classes=[0]) - boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else [] - boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else [] - for box in boxes_left: - cx, cy, w, h = box - new_cx = cy - new_cy = orig_W - 1 - cx - combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.RIGHT)) - for box in boxes_right: - cx, cy, w, h = box - new_cx = orig_H - 1 - cy - new_cy = cx - combined_boxes.append((new_cx, new_cy, h, w, AgentActivityEnum.LEFT)) - - boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] - for box in boxes_normal: - cx, cy, w, h = box - combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) - - # Visualize the received images in 2D with their corresponding labels - # It draws rectangles and labels on the images: - if getattr(self, 'visualize_2d', False): - for (cx, cy, w, h, activity) in combined_boxes: - left = int(cx - w / 2) - right = int(cx + w / 2) - top = int(cy - h / 2) - bottom = int(cy + h / 2) - if activity == AgentActivityEnum.STANDING: - color = (255, 0, 0) - label = "STANDING" - elif activity == AgentActivityEnum.RIGHT: - color = (0, 255, 0) - label = "RIGHT" - elif activity == AgentActivityEnum.LEFT: - color = (0, 0, 255) - label = "LEFT" - else: - color = (255, 255, 255) - label = "UNKNOWN" - cv2.rectangle(undistorted_img, (left, top), (right, bottom), color, 2) - cv2.putText(undistorted_img, label, (left, max(top - 5, 20)), - cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) - cv2.imshow("Detection - Cone 2D", undistorted_img) - - start = time.time() - # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. - # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) - pts_cam = transform_points_l2c(lidar_down, self.T_l2c) - projected_pts = project_points(pts_cam, self.current_K, lidar_down) - # What is returned: - # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position) - # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) - # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame - - # POINTPILLARS TIES IN HERE - pointpillars_detections = [] - # Get the LiDAR points (X, Y, Z) from the projected points - lidar_points = projected_pts[:, 2:5] # Lidar points are in the Lidar coordinate frame - - with torch.no_grad(): - # Convert to tensor and format for PointPillars - lidar_tensor = torch.from_numpy(lidar_points).float() - if torch.cuda.is_available(): - lidar_tensor = lidar_tensor.cuda() - - # Format as batch with a single point cloud - batched_pts = [lidar_tensor] - - # Get PointPillars predictions - results = self.pointpillars(batched_pts, mode='test') - - if results and len(results) > 0 and 'lidar_bboxes' in results[0]: - # Process PointPillars results - pp_bboxes = results[0]['lidar_bboxes'] - pp_labels = results[0]['labels'] - pp_scores = results[0]['scores'] - - for i, (bbox, label, score) in enumerate(zip(pp_bboxes, pp_labels, pp_scores)): - # Skip low confidence detections - if score < 0.5: # Adjust threshold as needed - continue - - # Extract center position and dimensions - x, y, z, l, w, h, yaw = bbox - center = np.array([x, y, z]) - dims = (l, w, h) - - # Convert to vehicle coordinates - center_hom = np.append(center, 1) - center_vehicle_hom = self.T_l2v @ center_hom - center_vehicle = center_vehicle_hom[:3] - - # Create rotation matrix and convert to Euler angles - R_lidar = R.from_euler('z', yaw).as_matrix() - R_vehicle = self.T_l2v[:3, :3] @ R_lidar - veh_yaw, veh_pitch, veh_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) - - # Transform to appropriate frame (START or CURRENT) - if self.use_start_frame: - if self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose - vehicle_start_pose = vehicle.pose.to_frame( - ObjectFrameEnum.START, - vehicle.pose, - self.start_pose_abs - ) - T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) - xp, yp, zp = (T_vehicle_to_start @ np.append(center_vehicle, 1))[:3] - out_frame = ObjectFrameEnum.START - else: - xp, yp, zp = center_vehicle - out_frame = ObjectFrameEnum.CURRENT - - # Determine agent activity based on label - # Map PointPillars class indices to your activities - if label == 0: # Adjust mapping as needed - activity = AgentActivityEnum.STANDING - elif label == 1: - activity = AgentActivityEnum.MOVING - else: - activity = AgentActivityEnum.UNKNOWN - - # Create pose and record detection - pp_pose = ObjectPose( - t=current_time, - x=xp, y=yp, z=zp, - yaw=veh_yaw, pitch=veh_pitch, roll=veh_roll, - frame=out_frame - ) - - pointpillars_detections.append({ - 'pose': pp_pose, - 'dimensions': dims, - 'activity': activity, - 'score': score - }) - - rospy.loginfo(f"PointPillars detected {len(pointpillars_detections)} objects") - - - end = time.time() - # print('-------processing time1---', end -start) - agents = {} + # TODO: Loop through bounding box lists here + # The bounding box lists that were matched up by self.synchronized_callback SHOULD match up + # correctly because we manually inserted the time stamp of the lidar data into the header + # of the bounding box list. So since ApproximateTimeSynchronizer syncs up messages which + # have similar time stamps (assumed to be determined by the time stamp in the message header), + # the bounding box lists being compared should be from the same point cloud data. The image + # data paired with it may be slightly different but since the bounding boxes from both models + # were built in 3D space using the lidar data, they should pair up well enough + + # The bounding boxes in both lists SHOULD ALREADY BE IN THE VEHICLE FRAME since we transformed + # the data from lidar->vehicle before creating the bounding boxes and then publishing. + + # To compare the bounding boxes in the lists, we can either use a 2D intersection over union birds + # eye view approach (since point pillars creates vertical pillars anyways) or we can do a 3D + # intersection over union. We could then merge the boxes that match closely by averaging + # their positions and dimensions and then we'd choose the label with the highest confidence. + + # For the leftover bounding boxes, we can still use them with their original confidence + # (confidence was placed in the value field of each box). + + # Finally, we would need to convert each box to an AgentState object + # We would then need to transform the AgentState object to the start frame to compare with old + # AgentState objects to assign id's and calculate velocity + # Then we would need to return the new list of AgentState objects - for i, box_info in enumerate(combined_boxes): - # Calculate the 2D bounding box in the image - cx, cy, w, h, activity = box_info - left = int(cx - w / 1.6) - right = int(cx + w / 1.6) - top = int(cy - h / 2) - bottom = int(cy + h / 2) - - # Cast rays from the camera through the 2D bounding box into 3D space and capture all LiDAR - # points that fall along those rays to get the 3D representation of the detected object: - mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ - (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) - roi_pts = projected_pts[mask] - - # Ignore regions of interest that don't have many points - if roi_pts.shape[0] < 5: - continue - - # Filter the points by distance and depth to remove outliers: - points_3d = roi_pts[:, 2:5] - points_3d = filter_points_within_threshold(points_3d, 40) - points_3d = remove_ground_by_min_range(points_3d, z_range=0.08) - points_3d = filter_depth_points(points_3d, max_depth_diff=0.5) - - # If enabled, use a cylindrical ROI around the mean point position, - # filter ground points, and depth points: - if self.use_cyl_roi: - global_filtered = filter_points_within_threshold(lidar_down, 30) - roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.4, height=1.2) - refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01) - refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.3) - else: - refined_cluster = points_3d.copy() - # end1 = time.time() - if refined_cluster.shape[0] < 4: - continue - - # Create an Open3D point cloud with the filtered points and calculate an oriented - # bounding box to get the object's precise 3D position, dimensions, center, and - # orientation - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(refined_cluster) - obb = pcd.get_oriented_bounding_box() - refined_center = obb.center - dims = tuple(obb.extent) - R_lidar = obb.R.copy() - - # Transform the object's center position from LiDAR to vehicle coordinates - refined_center_hom = np.append(refined_center, 1) - refined_center_vehicle_hom = self.T_l2v @ refined_center_hom - refined_center_vehicle = refined_center_vehicle_hom[:3] - - # Transforms the object's rotation matrix and converts it to yaw/pitch/roll - R_vehicle = self.T_l2v[:3, :3] @ R_lidar - yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) - refined_center = refined_center_vehicle - - if self.use_start_frame: - # Transform coordinates to the starting vehicle frame - if self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose - vehicle_start_pose = vehicle.pose.to_frame( - ObjectFrameEnum.START, - vehicle.pose, - self.start_pose_abs - ) - T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) - xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3] - out_frame = ObjectFrameEnum.START - else: - # Use the current vehicle frame - xp, yp, zp = refined_center - out_frame = ObjectFrameEnum.CURRENT - - new_pose = ObjectPose( - t=current_time, - x=xp, y=yp, z=zp, - yaw=yaw, pitch=pitch, roll=roll, - frame=out_frame - ) - - # --- Optional tracking --- - if self.enable_tracking: - existing_id = match_existing_cone( - np.array([new_pose.x, new_pose.y, new_pose.z]), - dims, - self.tracked_agents, - distance_threshold=2.0 - ) - if existing_id is not None: - old_state = self.tracked_agents[existing_id] - if vehicle.v < 100: - alpha = 0.1 - avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x - avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y - avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z - avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw - avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch - avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll - - updated_pose = ObjectPose( - t=new_pose.t, - x=avg_x, - y=avg_y, - z=avg_z, - yaw=avg_yaw, - pitch=avg_pitch, - roll=avg_roll, - frame=new_pose.frame - ) - updated_agent = AgentState( - pose=updated_pose, - dimensions=dims, - outline=None, - type=AgentEnum.CONE, - activity=activity, - velocity=(0, 0, 0), - yaw_rate=0 - ) - else: - updated_agent = old_state - agents[existing_id] = updated_agent - self.tracked_agents[existing_id] = updated_agent - else: - agent_id = f"Cone{self.ped_counter}" - self.ped_counter += 1 - new_agent = AgentState( - pose=new_pose, - dimensions=dims, - outline=None, - type=AgentEnum.CONE, - activity=activity, - velocity=(0, 0, 0), - yaw_rate=0 - ) - agents[agent_id] = new_agent - self.tracked_agents[agent_id] = new_agent - else: - agent_id = f"Cone{self.ped_counter}" - self.ped_counter += 1 - new_agent = AgentState( - pose=new_pose, - dimensions=dims, - outline=None, - type=AgentEnum.CONE, - activity=activity, - velocity=(0, 0, 0), - yaw_rate=0 - ) - agents[agent_id] = new_agent - - self.current_agents = agents - - # If tracking not enabled, return only current frame detections - if not self.enable_tracking: - for agent_id, agent in self.current_agents.items(): - p = agent.pose - rospy.loginfo( - f"Agent ID: {agent_id}\n" - f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " - f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" - f"type:{agent.activity}" - ) - end = time.time() - # print('-------processing time', end -start) - return self.current_agents - - stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() - if current_time - agent.pose.t > 20.0] - for agent_id in stale_ids: - rospy.loginfo(f"Removing stale agent: {agent_id}\n") - del self.tracked_agents[agent_id] - if self.enable_tracking: - for agent_id, agent in self.tracked_agents.items(): - p = agent.pose - rospy.loginfo( - f"Agent ID: {agent_id}\n" - f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " - f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" - f"type:{agent.activity}" - ) end = time.time() - # print('-------processing time', end -start) + # print('-------processing time---', end -start) return self.tracked_agents - def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None: - os.makedirs("data", exist_ok=True) - tstamp = int(self.vehicle_interface.time() * 1000) - # 1) Dump raw image - cv2.imwrite(f"data/{tstamp}_image.png", lastest_image) - # 2) Dump raw LiDAR - np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar) - # 3) Write BEFORE_TRANSFORM - with open(f"data/{tstamp}_vehstate.txt", "w") as f: - vp = vehicle.pose - f.write( - f"BEFORE_TRANSFORM " - f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " - f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" - ) - # Compute vehicle_start_pose in either START or CURRENT - if self.use_start_frame: - if self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose - vehicle_start_pose = vehicle.pose.to_frame( - ObjectFrameEnum.START, - vehicle.pose, - self.start_pose_abs - ) - mode = "START" - else: - vehicle_start_pose = vehicle.pose - mode = "CURRENT" - with open(f"data/{tstamp}_vehstate.txt", "a") as f: - f.write( - f"AFTER_TRANSFORM " - f"x={vehicle_start_pose.x:.3f}, " - f"y={vehicle_start_pose.y:.3f}, " - f"z={vehicle_start_pose.z:.3f}, " - f"yaw={vehicle_start_pose.yaw:.2f}, " - f"pitch={vehicle_start_pose.pitch:.2f}, " - f"roll={vehicle_start_pose.roll:.2f}, " - f"frame={mode}\n" - ) - # Fake 2D Combined Detector for testing purposes -class FakCombinedDetector(Component): +# TODO FIX THIS +class FakeCombinedDetector2D(Component): def __init__(self, vehicle_interface: GEMInterface): self.vehicle_interface = vehicle_interface self.times = [(5.0, 20.0), (30.0, 35.0)] diff --git a/launch/combined_detection.yaml b/launch/combined_detection.yaml index a63bd86ac..8c8320992 100644 --- a/launch/combined_detection.yaml +++ b/launch/combined_detection.yaml @@ -15,15 +15,8 @@ drive: agent_detection : type: combined_detection.CombinedDetector3D args: - camera_name: front #[front, front_right] - camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml - # optional overrides enable_tracking: True # True if you want to enable tracking function - visualize_2d: False # True if you see 2D detection visualization - use_cyl_roi: False # True if you want to use a cylinder roi (no need to use if calibration is accurate) - save_data: False # True if you want to save the sensor input data - orientation: True # True if you need to detect flipped cones use_start_frame: True # True if you need output in START frame, otherwise in CURRENT frame perception_normalization : StandardPerceptionNormalizer @@ -109,7 +102,7 @@ variants: description: "Run the yielding trajectory planner on the real vehicle with faked perception" drive: perception: - agent_detection : cone_detection.FakeConeDetector2D + agent_detection : cone_detection.FakeCombinedDetector2D fake_sim: run: From a889ccc4f81995ff6bb8eabf5499bb53a4024ed2 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 4 May 2025 19:22:55 -0500 Subject: [PATCH 20/55] Create PointPillars_ROS --- GEMstack/onboard/perception/sensorFusion/PointPillars_ROS | 1 + 1 file changed, 1 insertion(+) create mode 100644 GEMstack/onboard/perception/sensorFusion/PointPillars_ROS diff --git a/GEMstack/onboard/perception/sensorFusion/PointPillars_ROS b/GEMstack/onboard/perception/sensorFusion/PointPillars_ROS new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/PointPillars_ROS @@ -0,0 +1 @@ + From 284e80a015311b632a331a651c0f116e0d58ee15 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 4 May 2025 19:24:09 -0500 Subject: [PATCH 21/55] Create pointpillars_test.launch --- .../launch/pointpillars_test.launch | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch new file mode 100644 index 000000000..022ff77cb --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + From 6ceb3d5be9b33e6b5a2b6066ca0d78a1943d3dc8 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 4 May 2025 19:24:52 -0500 Subject: [PATCH 22/55] pointpillars_ros_node.py updated K, D matrices --- .../scripts/pointpillars_ros_node.py | 351 ++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py new file mode 100644 index 000000000..c8d927899 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 + +## To run: +# Create Package: catkin_create_pkg pointpillars_ros rospy std_msgs sensor_msgs geometry_msgs visualization_msgs message_filters tf2_ros cv_bridge ros_numpy +# In launch/pointpillars_test.launch: Update Checkpoint_path and config_path. Set target_frame. +# chmod +x ~/pointpillars_ros/scripts/pointpillars_ros_node.py +# catkin_make +# source ~/catkin_ws/devel/setup.bash +# export ROS_PACKAGE_PATH=~Gem**/pointpillars_ros/../:$ROS_PACKAGE_PATH +# roslaunch pointpillars_ros pointpillars_test.launch (Make sure roscore and your data source (e.g., rosbag play) are running). +# Publish K and D on CameraInfo topic, and a working TF tree providing the transform between the LiDAR frame and the camera frame (self.target_frame). +#!/usr/bin/env python3 + +#!/usr/bin/env python3 + +import rospy +import ros_numpy +import numpy as np +import torch +import cv2 +import os +import sys +import time + +###### ROS Message Imports +from sensor_msgs.msg import PointCloud2, Image # Removed CameraInfo +from visualization_msgs.msg import Marker, MarkerArray +from geometry_msgs.msg import Point + +###### Synchronization and TF Imports +import message_filters +import tf2_ros +import tf2_geometry_msgs +from tf.transformations import quaternion_from_euler + +###### Image Processing Import +from cv_bridge import CvBridge, CvBridgeError + +pointpillars_repo_path = '~/PointPillars' # <<< CHANGE THIS >>> + +if not os.path.isdir(pointpillars_repo_path): + try: import rospy; rospy.logfatal(f"PointPillars repository path not found: {pointpillars_repo_path}") + except ImportError: print(f"[FATAL] PointPillars repository path not found: {pointpillars_repo_path}") + sys.exit(1) +sys.path.insert(0, pointpillars_repo_path) + +###### Import necessary PointPillars components +try: + from pointpillars.model import PointPillars + from pointpillars.utils import setup_seed +except ImportError as e: + try: import rospy; rospy.logfatal(f"Failed to import PointPillars modules from {pointpillars_repo_path}. Error: {e}") + except ImportError: print(f"[FATAL] Failed to import PointPillars modules from {pointpillars_repo_path}. Error: {e}") + sys.exit(1) + + +class PointPillarsROS: + def __init__(self): + rospy.init_node('pointpillars_ros_node') + + ###### Load essential parameters + self.checkpoint_path = rospy.get_param('~checkpoint_path') + self.config_path = rospy.get_param('~config_path') # Still potentially needed for class names etc. + lidar_topic = rospy.get_param('~lidar_topic', '/ouster/points') + image_topic = rospy.get_param('~image_topic', '/oak/rgb/image_raw') + # Removed camera_info_topic parameter + self.score_threshold = rospy.get_param('~score_threshold', 0.3) + self.target_frame = rospy.get_param('~target_frame') # Must match child frame in TF publisher + + ###### Load Calibration K and D from Parameters + try: + k_list = rospy.get_param('~camera_K') + d_list = rospy.get_param('~camera_D') + + # Validate and convert K + if not isinstance(k_list, list) or len(k_list) != 3 or \ + not all(isinstance(row, list) and len(row) == 3 for row in k_list): + raise ValueError("Parameter '~camera_K' must be a 3x3 list of lists.") + self.camera_matrix = np.array(k_list, dtype=np.float64) # Use float64 for cv2 + + # Validate and convert D + if not isinstance(d_list, list): + raise ValueError("Parameter '~camera_D' must be a list.") + self.dist_coeffs = np.array(d_list, dtype=np.float64) # Use float64 for cv2 + + rospy.loginfo("Camera K and D matrices loaded from parameters.") + rospy.logdebug(f"K = {self.camera_matrix.tolist()}") + rospy.logdebug(f"D = {self.dist_coeffs.tolist()}") + + except Exception as e: + rospy.logfatal(f"Failed to load camera calibration K/D from parameters: {e}") + rospy.logfatal("Please ensure '~camera_K' (3x3 list) and '~camera_D' (list) parameters are set correctly in the launch file.") + sys.exit(1) + + ###### Load PointPillars configuration (might still need for class names etc.) + try: + # If config isn't needed by model(), this might be simplified further + # depending on how class names are handled for visualization + from pointpillars.utils import load_config # Import here if needed + self.cfg = load_config(self.config_path) + rospy.loginfo(f"Loaded model config from {self.config_path}") + except FileNotFoundError: + # Config might be optional if class names handled differently + rospy.logwarn(f"Model config file not found: {self.config_path}. Class names might not be available.") + self.cfg = {} # Use empty dict if config optional/missing + except Exception as e: + rospy.logwarn(f"Error loading config {self.config_path}: {e}. Class names might not be available.") + self.cfg = {} + + ###### Setup Seed + setup_seed(self.cfg.get('seed', 0)) # Use seed from config or default + + ###### Define Point Cloud Ranges (from test.py) + self.pcd_limit_range = np.array([0, -39.68, -3, 69.12, 39.68, 1], dtype=np.float32) + self.pcd_post_limit_range = np.array([0, -40, -3, 70.4, 40, 0.0], dtype=np.float32) + + ###### Determine number of classes + n_classes = self.cfg.get('nclasses', 3) # Get from config or default to 3 + rospy.loginfo(f"Initializing PointPillars model for {n_classes} classes.") + + ###### Setup device and load model + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model = PointPillars(nclasses=n_classes).to(self.device) + + if not os.path.exists(self.checkpoint_path): + rospy.logfatal(f"Checkpoint file not found: {self.checkpoint_path}") + sys.exit(1) + try: + map_location = self.device if not torch.cuda.is_available() else None + checkpoint = torch.load(self.checkpoint_path, map_location=map_location) + state_dict = checkpoint.get('model_state_dict', checkpoint) + self.model.load_state_dict(state_dict) + self.model.eval() + rospy.loginfo(f"PointPillars model loaded successfully from {self.checkpoint_path} on device {self.device}.") + except Exception as e: + rospy.logfatal(f"Error loading model checkpoint: {e}", exc_info=True) + sys.exit(1) + + ###### Initialize CV Bridge + self.cv_bridge = CvBridge() + + ###### TF Listener Setup + self.tf_buffer = tf2_ros.Buffer() + self.tf_listener = tf2_ros.TransformListener(self.tf_buffer) + + ###### Publisher Setup + self.image_pub = rospy.Publisher("~output/image_annotated", Image, queue_size=2) + self.marker_pub = rospy.Publisher("~output/bounding_boxes", MarkerArray, queue_size=5) + + ###### Subscriber and Synchronizer Setup + lidar_sub = message_filters.Subscriber(lidar_topic, PointCloud2) + image_sub = message_filters.Subscriber(image_topic, Image) + self.sync = message_filters.ApproximateTimeSynchronizer( + [lidar_sub, image_sub], queue_size=15, slop=0.1) + self.sync.registerCallback(self.callback) + + rospy.loginfo("PointPillars ROS node initialized and ready.") + + + def point_range_filter(self, pts, point_range): + ###### Filter points outside the specified range + keep_mask = (pts[:, 0] > point_range[0]) & \ + (pts[:, 1] > point_range[1]) & \ + (pts[:, 2] > point_range[2]) & \ + (pts[:, 0] < point_range[3]) & \ + (pts[:, 1] < point_range[4]) & \ + (pts[:, 2] < point_range[5]) + return pts[keep_mask] + + # Removed process_camera_info callback + + def get_transform(self, target_frame, source_frame, timestamp): + ###### Get TF transform + try: + return self.tf_buffer.lookup_transform(target_frame, source_frame, timestamp, rospy.Duration(0.1)) + except Exception as e: + rospy.logwarn_throttle(5.0, f"TF lookup failed from {source_frame} to {target_frame}: {e}") + return None + + def callback(self, lidar_msg, image_msg): + start_time = time.time() + + ###### Convert Lidar + try: + pc_data = ros_numpy.point_cloud2.pointcloud2_to_array(lidar_msg) + if 'intensity' in pc_data.dtype.names: + points = np.vstack([pc_data['x'], pc_data['y'], pc_data['z'], pc_data['intensity']]).T + else: + rospy.logwarn_once("Lidar 'intensity' field not found. Using 0.") + points = np.vstack([pc_data['x'], pc_data['y'], pc_data['z'], np.zeros(len(pc_data))]).T + points = points.astype(np.float32) + except Exception as e: + rospy.logerr(f"Error converting PointCloud2: {e}") + return + + ###### Filter points by range + points_filtered = self.point_range_filter(points, self.pcd_limit_range) + if points_filtered.shape[0] == 0: + rospy.logdebug("No points left after range filtering.") + if self.marker_pub.get_num_connections() > 0: self.clear_markers(lidar_msg.header) + if self.image_pub.get_num_connections() > 0: self.image_pub.publish(image_msg) + return + + ###### Convert to Tensor + pc_torch = torch.from_numpy(points_filtered).to(self.device) + batched_pts = [pc_torch] + + ###### Run Inference + result_dict = {} + with torch.no_grad(): + try: + results = self.model(batched_pts=batched_pts, mode='test') + if isinstance(results, list) and len(results) > 0: + result_dict = results[0] + else: + rospy.logwarn(f"Unexpected model output format: {type(results)}. Expected list.") + except Exception as e: + rospy.logerr(f"Error during model inference: {e}", exc_info=True) + return # Exit callback on inference error + + ###### Extract Results + det_boxes_lidar = result_dict.get('lidar_bboxes', np.array([])) + det_labels = result_dict.get('labels', np.array([])) + det_scores = result_dict.get('scores', np.array([])) + + ###### Apply Score Thresholding & Lidar Range Filtering + if det_boxes_lidar.shape[0] > 0: + score_mask = det_scores >= self.score_threshold + det_boxes_lidar = det_boxes_lidar[score_mask] + det_labels = det_labels[score_mask] + det_scores = det_scores[score_mask] + + if det_boxes_lidar.shape[0] > 0: + center_x = det_boxes_lidar[:, 0]; center_y = det_boxes_lidar[:, 1]; center_z = det_boxes_lidar[:, 2] + range_mask = (center_x >= self.pcd_post_limit_range[0]) & (center_y >= self.pcd_post_limit_range[1]) & \ + (center_z >= self.pcd_post_limit_range[2]) & (center_x < self.pcd_post_limit_range[3]) & \ + (center_y < self.pcd_post_limit_range[4]) & (center_z < self.pcd_post_limit_range[5]) + det_boxes_lidar = det_boxes_lidar[range_mask] + det_labels = det_labels[range_mask] + det_scores = det_scores[range_mask] + + ###### Get Transform for Visualization + # target_frame now comes directly from param, MUST match static transform publisher child frame + transform_lidar_to_target = self.get_transform(self.target_frame, lidar_msg.header.frame_id, lidar_msg.header.stamp) + + ###### Visualization + num_detections = len(det_boxes_lidar) + cv_image = None + try: + cv_image = self.cv_bridge.imgmsg_to_cv2(image_msg, desired_encoding="bgr8") + except CvBridgeError as e: + rospy.logerr(f"Error converting Image: {e}") + + if num_detections > 0: + rospy.logdebug(f"Detected {num_detections} objects after filtering.") + if self.marker_pub.get_num_connections() > 0: + self.publish_markers(det_boxes_lidar, lidar_msg.header, det_labels, det_scores) + + # Use K, D loaded from params for projection + if self.image_pub.get_num_connections() > 0 and cv_image is not None: + if self.camera_matrix is not None and self.dist_coeffs is not None and transform_lidar_to_target is not None: + annotated_image = self.project_and_draw_boxes_on_image( + cv_image.copy(), det_boxes_lidar, transform_lidar_to_target, + self.camera_matrix, self.dist_coeffs, det_labels, det_scores) # Pass K, D from params + try: + img_msg_out = self.cv_bridge.cv2_to_imgmsg(annotated_image, encoding="bgr8") + img_msg_out.header = image_msg.header + self.image_pub.publish(img_msg_out) + except CvBridgeError as e: + rospy.logerr(f"Error converting annotated image to message: {e}") + else: + rospy.logwarn_throttle(5.0, "Cannot annotate image: Missing TF transform (K/D loaded from params).") + if cv_image is not None: self.image_pub.publish(image_msg) # Publish original + else: + rospy.logdebug("No detections passed filters.") + if self.marker_pub.get_num_connections() > 0: self.clear_markers(lidar_msg.header) + if self.image_pub.get_num_connections() > 0 and cv_image is not None: self.image_pub.publish(image_msg) # Publish original + + processing_time = time.time() - start_time + rospy.logdebug(f"Processed frame in {processing_time:.4f}s ({(1.0/processing_time if processing_time > 0 else 0):.1f} Hz)") + + def project_and_draw_boxes_on_image(self, image, boxes_lidar, transform, K, D, labels=None, scores=None): + ###### Project 3D boxes to image plane and draw using OpenCV + if K is None or D is None or transform is None: return image + ###### Convert transform to 4x4 matrix + trans = transform.transform.translation; quat = transform.transform.rotation + tf_matrix = tf.transformations.quaternion_matrix([quat.x, quat.y, quat.z, quat.w]) + tf_matrix[0, 3] = trans.x; tf_matrix[1, 3] = trans.y; tf_matrix[2, 3] = trans.z + ###### Box corners (unit cube) and edges + corners_norm = np.array([[ .5, .5, .5],[ .5, .5,-.5],[ .5,-.5,-.5],[ .5,-.5, .5], + [-.5, .5, .5],[-.5, .5,-.5],[-.5,-.5,-.5],[-.5,-.5, .5]]) + edges = [(0,1),(1,2),(2,3),(3,0),(4,5),(5,6),(6,7),(7,4),(0,4),(1,5),(2,6),(3,7)] + + for i, box in enumerate(boxes_lidar): + x, y, z, dx, dy, dz, yaw = box + if dx<=0 or dy<=0 or dz<=0: continue + ###### Calculate 8 corners in lidar frame + Rz = np.array([[np.cos(yaw),-np.sin(yaw),0],[np.sin(yaw),np.cos(yaw),0],[0,0,1]]) + corners = corners_norm * np.array([dx, dy, dz]); corners = corners @ Rz.T; corners += np.array([x, y, z]) + ###### Transform corners to camera frame + corners_lidar_h = np.hstack((corners, np.ones((8,1)))) + corners_cam_h = (tf_matrix @ corners_lidar_h.T).T; corners_cam = corners_cam_h[:,:3] + ###### Keep only points in front of the camera + front_mask = corners_cam[:, 2] > 0.1; + if not np.all(front_mask): continue + ###### Project to image plane using cv2.projectPoints + try: image_points, _ = cv2.projectPoints(corners_cam, np.zeros(3), np.zeros(3), K, D); image_points = image_points.reshape(-1, 2).astype(int) + except: continue # Ignore projection errors + ###### Draw edges + color = (0, 255, 0); thickness = 1 + for start_idx, end_idx in edges: cv2.line(image, tuple(image_points[start_idx]), tuple(image_points[end_idx]), color, thickness) + ###### Add label/score text (optional) + if labels is not None and scores is not None and i < len(labels) and i < len(scores): + try: + label_name = self.cfg.get('class_names', {}).get(int(labels[i]), f"L:{int(labels[i])}") # Use cfg if available, else basic + score = scores[i]; text = f"{label_name}:{score:.2f}" + text_pos = tuple(((image_points[0] + image_points[4]) // 2) + np.array([0,-5])) # Pos above top front center + cv2.putText(image, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1, cv2.LINE_AA) + except: pass # Ignore text errors + return image + + def clear_markers(self, header): + marker_array = MarkerArray(); marker = Marker(); marker.header = header + marker.ns = "pointpillars_boxes"; marker.id = 0; marker.action = Marker.DELETEALL + marker_array.markers.append(marker); self.marker_pub.publish(marker_array) + + def publish_markers(self, boxes_lidar, header, labels=None, scores=None): + marker_array = MarkerArray(); marker_id = 0; default_lifetime = rospy.Duration(0.2) + for i, box in enumerate(boxes_lidar): + x,y,z,dx,dy,dz,yaw = box; label_id = int(labels[i]) if labels is not None else 0; score = scores[i] if scores is not None else 1.0 + marker = Marker(); marker.header = header; marker.ns = "pointpillars_boxes"; marker.id = marker_id; marker.type = Marker.CUBE; marker.action = Marker.ADD; marker.lifetime = default_lifetime + marker.pose.position.x=x; marker.pose.position.y=y; marker.pose.position.z=z + q = quaternion_from_euler(0,0,yaw); marker.pose.orientation.x=q[0]; marker.pose.orientation.y=q[1]; marker.pose.orientation.z=q[2]; marker.pose.orientation.w=q[3] + marker.scale.x=max(dx,0.01); marker.scale.y=max(dy,0.01); marker.scale.z=max(dz,0.01); marker.color.r=0.0; marker.color.g=1.0; marker.color.b=0.0; marker.color.a=0.4 + marker_array.markers.append(marker); marker_id += 1 + text_marker = Marker(); text_marker.header = header; text_marker.ns = "pointpillars_labels"; text_marker.id = marker_id; text_marker.type = Marker.TEXT_VIEW_FACING; text_marker.action = Marker.ADD; text_marker.lifetime = default_lifetime + text_marker.pose.position.x=x; text_marker.pose.position.y=y; text_marker.pose.position.z = z + dz/2.0 + 0.3; text_marker.scale.z = 0.4; text_marker.color.r=1.0; text_marker.color.g=1.0; text_marker.color.b=1.0; text_marker.color.a=0.9 + label_name = self.cfg.get('class_names', {}).get(label_id, f"L:{label_id}") # Use cfg if available + text_marker.text = f"{label_name} ({score:.2f})" + marker_array.markers.append(text_marker); marker_id += 1 + if marker_array.markers: self.marker_pub.publish(marker_array) + +if __name__ == '__main__': + try: node = PointPillarsROS(); rospy.spin() + except rospy.ROSInterruptException: rospy.loginfo("PointPillars ROS node shutting down.") + except Exception as e: rospy.logfatal(f"Unhandled exception: {e}", exc_info=True) + finally: + if 'node' in locals() and hasattr(node, 'device') and node.device == torch.device("cuda"): + try: import torch.cuda; torch.cuda.empty_cache(); rospy.loginfo("Cleared CUDA cache.") + except: pass + rospy.loginfo("PointPillars ROS node finished.") From 04ddc1d814a608f0532bd4c72c3c4619c5a9e06b Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 4 May 2025 19:25:45 -0500 Subject: [PATCH 23/55] Add CmakeLists.txt --- .../pointpillars_ros/CmakeLists.txt | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt new file mode 100644 index 000000000..6b9ce3138 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.0.2) +project(pointpillars_ros) + +## Find required ROS packages +find_package(catkin REQUIRED COMPONENTS + cv_bridge + geometry_msgs + message_filters + ros_numpy + rospy + sensor_msgs + std_msgs + tf2_ros + visualization_msgs +) + +## Declare Python scripts installation +catkin_install_python(PROGRAMS + scripts/pointpillars_ros_node.py + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +## Declare launch files installation +install(DIRECTORY launch/ + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/launch +) From 75dfafbb80d6c0a1432062e057bc014131895984 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 4 May 2025 19:26:47 -0500 Subject: [PATCH 24/55] Add package.xml --- .../sensorFusion/pointpillars_ros/package.xml | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml new file mode 100644 index 000000000..06f2727ad --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml @@ -0,0 +1,21 @@ + + + pointpillars_ros + 0.1.0 + ROS wrapper for PointPillars 3D object detection. + Your Name Apache-2.0 catkin + + rospy + std_msgs + sensor_msgs + geometry_msgs + visualization_msgs + message_filters + tf2_ros + cv_bridge + ros_numpy + + + + + From cfd3994bd0eda33ebb8f475c905deaa0a3454927 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Sun, 4 May 2025 20:15:02 -0500 Subject: [PATCH 25/55] combined_detection.py add merge, fusing 3d bboxes --- .../onboard/perception/combined_detection.py | 261 +++++++++++++----- 1 file changed, 195 insertions(+), 66 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 958d5b274..b5880f8f0 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -8,47 +8,72 @@ import time import os import yaml +from typing import Dict, List, Optional, Tuple +import numpy as np +from scipy.spatial.transform import Rotation as R + from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray -class CombinedDetector3D(Component): - """ - Fuses the boxes in the lists of bounding boxes published by YoloNode and - PointPillarsNode with late sensor fusion. - TODO: SUBSCRIBE TO BOUNDING BOX LISTS AND PERFORM LATE SENSOR FUSION IN THIS FILE. - TODO: MODIFY YAML FILE FOR THE CONTROL TEAM'S BASIC PATH PLANNING CODE? +# TODO: Import IOU from IOU funcs in SensorFusion? + +# Reuse eval funcs? +def calculate_3d_iou(box1: BoundingBox, box2: BoundingBox) -> float: + return 0.0 + +def merge_boxes(box1: BoundingBox, box2: BoundingBox) -> BoundingBox: + # TODO: merging + # Heuristics- Average pose + # Average dimensions + # Use highest score + # Label specific logic + merged_box = BoundingBox() + merged_box.header = box1.header # Use header from one input + + ## Avg position, average dimensions, max score, box1 label + merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0 + merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0 + merged_box.pose.position.z = (box1.pose.position.z + box2.pose.position.z) / 2.0 + # Avg orientation (quaternions) + merged_box.pose.orientation = box1.pose.orientation + merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0 + merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0 + merged_box.dimensions.z = (box1.dimensions.z + box2.dimensions.z) / 2.0 + merged_box.value = max(box1.value, box2.value) # Max score + merged_box.label = box1.label # Label from first box - Tracking is optional: set `enable_tracking=False` to disable persistent tracking - and return only detections from the current frame. + return merged_box - Supports multiple cameras; each camera’s intrinsics and extrinsics are - loaded from a single YAML calibration file via plain PyYAML. - """ +class CombinedDetector3D(Component): def __init__( self, vehicle_interface: GEMInterface, enable_tracking: bool = True, use_start_frame: bool = True, - **kwargs + iou_threshold: float = 0.1, + **kwargs ): - # Core interfaces and state self.vehicle_interface = vehicle_interface - self.current_agents = {} - self.tracked_agents = {} + self.tracked_agents: Dict[str, AgentState] = {} self.ped_counter = 0 - self.latest_yolo_bbxs = None # Stores the latest list of YOLO bounding boxes - self.latest_pp_bbxs = None # Stores the latest list of PointPillars bounding boxes - self.start_pose_abs = None - self.start_time = None + self.latest_yolo_bbxs: Optional[BoundingBoxArray] = None + self.latest_pp_bbxs: Optional[BoundingBoxArray] = None + self.start_pose_abs: Optional[ObjectPose] = None + self.start_time: Optional[float] = None - # Config flags self.enable_tracking = enable_tracking self.use_start_frame = use_start_frame + self.iou_threshold = iou_threshold + + self.yolo_topic = "/yolo_boxes" + self.pp_topic = "/pointpillars_boxes" + + rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.") def rate(self) -> float: - return 8 + return 8.0 def state_inputs(self) -> list: return ['vehicle'] @@ -57,61 +82,165 @@ def state_outputs(self) -> list: return ['agents'] def initialize(self): - # Subscribe to the BoundingBox - self.yolo_sub = Subscriber('/yolo_boxes', BoundingBoxArray) - self.pp_sub = Subscriber('/pointpillars_boxes', BoundingBoxArray) - self.sync = ApproximateTimeSynchronizer([ - self.yolo_sub, self.pp_sub - ], queue_size=50, slop=0.05) # GREATLY DECREASED QUEUE SIZE, 50 might even be too much + self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray) + self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray) + + queue_size = 10 + slop = 0.1 + + self.sync = ApproximateTimeSynchronizer( + [self.yolo_sub, self.pp_sub], + queue_size=queue_size, + slop=slop + ) self.sync.registerCallback(self.synchronized_callback) + rospy.loginfo("CombinedDetector3D Subscribers Initialized.") - def synchronized_callback(self, yolo_bbxs_msg, pp_bbxs_msg): + def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: BoundingBoxArray): self.latest_yolo_bbxs = yolo_bbxs_msg self.latest_pp_bbxs = pp_bbxs_msg - def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: - # Gate guards against data not being present for both sensors: - if self.latest_yolo_bbxs is None or self.latest_pp_bbxs is None: - return {} - - # Set up current time variables - start = time.time() + def update(self, state: AllState) -> Dict[str, AgentState]: + vehicle = state.vehicle current_time = self.vehicle_interface.time() + yolo_bbx_array = self.latest_yolo_bbxs + pp_bbx_array = self.latest_pp_bbxs + + if yolo_bbx_array is None or pp_bbx_array is None: + return {} + if self.start_time is None: self.start_time = current_time - time_elapsed = current_time - self.start_time - - agents = {} - # TODO: Loop through bounding box lists here - # The bounding box lists that were matched up by self.synchronized_callback SHOULD match up - # correctly because we manually inserted the time stamp of the lidar data into the header - # of the bounding box list. So since ApproximateTimeSynchronizer syncs up messages which - # have similar time stamps (assumed to be determined by the time stamp in the message header), - # the bounding box lists being compared should be from the same point cloud data. The image - # data paired with it may be slightly different but since the bounding boxes from both models - # were built in 3D space using the lidar data, they should pair up well enough - - # The bounding boxes in both lists SHOULD ALREADY BE IN THE VEHICLE FRAME since we transformed - # the data from lidar->vehicle before creating the bounding boxes and then publishing. - - # To compare the bounding boxes in the lists, we can either use a 2D intersection over union birds - # eye view approach (since point pillars creates vertical pillars anyways) or we can do a 3D - # intersection over union. We could then merge the boxes that match closely by averaging - # their positions and dimensions and then we'd choose the label with the highest confidence. - - # For the leftover bounding boxes, we can still use them with their original confidence - # (confidence was placed in the value field of each box). - - # Finally, we would need to convert each box to an AgentState object - # We would then need to transform the AgentState object to the start frame to compare with old - # AgentState objects to assign id's and calculate velocity - # Then we would need to return the new list of AgentState objects - - end = time.time() - # print('-------processing time---', end -start) + if self.use_start_frame and self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + rospy.loginfo("CombinedDetector3D latched start pose.") + + current_frame_agents = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) + + if self.enable_tracking: + self._update_tracking(current_frame_agents) + else: + self.tracked_agents = current_frame_agents # NOTE: No deepcopy + return self.tracked_agents + + def _fuse_bounding_boxes(self, + yolo_bbx_array: BoundingBoxArray, + pp_bbx_array: BoundingBoxArray, + vehicle_state: VehicleState, + current_time: float + ) -> Dict[str, AgentState]: + current_agents_in_frame: Dict[str, AgentState] = {} + yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes + pp_boxes: List[BoundingBox] = pp_bbx_array.boxes + + output_frame_enum = ObjectFrameEnum.START if self.use_start_frame else ObjectFrameEnum.CURRENT + + matched_yolo_indices = set() + matched_pp_indices = set() + fused_boxes_list: List[BoundingBox] = [] + + # Can optimize from NxM loop + for i, yolo_box in enumerate(yolo_boxes): + best_match_j = -1 + best_iou = -1.0 + for j, pp_box in enumerate(pp_boxes): + if j in matched_pp_indices: # Skip already matched PP boxes + continue + + ## IoU + iou = calculate_3d_iou(yolo_box, pp_box) + + if iou > self.iou_threshold and iou > best_iou: + best_iou = iou + best_match_j = j + + if best_match_j != -1: + rospy.logdebug(f"Matched YOLO box {i} with PP box {best_match_j} (IoU: {best_iou:.3f})") + matched_yolo_indices.add(i) + matched_pp_indices.add(best_match_j) + merged = merge_boxes(yolo_box, pp_boxes[best_match_j]) + fused_boxes_list.append(merged) + + ## Unmatched Bboxes + for i, yolo_box in enumerate(yolo_boxes): + if i not in matched_yolo_indices: + fused_boxes_list.append(yolo_box) + rospy.logdebug(f"Kept unmatched YOLO box {i}") + + for j, pp_box in enumerate(pp_boxes): + if j not in matched_pp_indices: + fused_boxes_list.append(pp_box) + rospy.logdebug(f"Kept unmatched PP box {j}") + + # Agenstate + for i, box in enumerate(fused_boxes_list): + try: + # Cur vehicle frame + pos_x = box.pose.position.x; pos_y = box.pose.position.y; pos_z = box.pose.position.z + quat_x = box.pose.orientation.x; quat_y = box.pose.orientation.y; quat_z = box.pose.orientation.z; quat_w = box.pose.orientation.w + yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) + + # Start frame + if self.use_start_frame and self.start_pose_abs is not None: + vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( + ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) + object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) + object_pose_start_h = T_vehicle_to_start @ object_pose_current_h + final_x, final_y, final_z = object_pose_start_h[:3, 0] + else: + final_x, final_y, final_z = pos_x, pos_y, pos_z + + final_pose = ObjectPose( + t=current_time, x=final_x, y=final_y, z=final_z, + yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum + ) + dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z) + ######### Mapping based on label (integer) from BoundingBox msg + agent_type = AgentEnum.PEDESTRIAN if box.label == 0 else AgentEnum.UNKNOWN # Needs refinement + activity = AgentActivityEnum.UNKNOWN # Placeholder + + # temp id + # _update_tracking assign persistent IDs + temp_agent_id = f"FrameDet_{i}" + + current_agents_in_frame[temp_agent_id] = AgentState( + pose=final_pose, dimensions=dims, outline=None, type=agent_type, + activity=activity, velocity=(0.0,0.0,0.0), yaw_rate=0.0 + # score=box.value # score + ) + except Exception as e: + rospy.logwarn(f"Failed to convert final BoundingBox {i} to AgentState: {e}") + continue + + return current_agents_in_frame + + + def _update_tracking(self, current_frame_agents: Dict[str, AgentState]): + + # Todo tracking + ## Match 'current_frame_agents' to 'self.tracked_agents'. + ## - Use position (already in correct START or CURRENT frame), maybe size/type. + ## - Need a matching algorithm (e.g., nearest neighbor within radius, Hungarian). + ## For matched pairs: + ## - Update the existing agent in 'self.tracked_agents' (e.g., smooth pose, update timestamp). + ## For unmatched 'current_frame_agents': + ## - These are new detections. Assign a persistent ID (e.g., f"Ped_{self.ped_counter}"). + ## - Increment self.ped_counter. + ## - Add them to 'self.tracked_agents'. + ## For unmatched 'self.tracked_agents' (agents not seen this frame): + ## - Increment a 'missed frames' counter or check timestamp. + ## - If missed for too long (e.g., > 1 second), remove from 'self.tracked_agents'. + + # return without tracking + self.tracked_agents = current_frame_agents + + + # Fake 2D Combined Detector for testing purposes # TODO FIX THIS class FakeCombinedDetector2D(Component): @@ -151,4 +280,4 @@ def box_to_fake_agent(box): if __name__ == '__main__': - pass \ No newline at end of file + pass From 93d44e249c8a2c9e43a215fa9a303e0dcdeff714 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Mon, 5 May 2025 12:03:27 -0500 Subject: [PATCH 26/55] Added 3d IOU calculation to combined_detection.py --- .../onboard/perception/combined_detection.py | 28 +++++++++------ .../sensorFusion/eval_3d_bbox_performance.py | 36 +++++++++++-------- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index b5880f8f0..608a7b66b 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -11,16 +11,10 @@ from typing import Dict, List, Optional, Tuple import numpy as np from scipy.spatial.transform import Rotation as R - - from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray +from .sensorFusion.eval_3d_bbox_performance import calculate_3d_iou -# TODO: Import IOU from IOU funcs in SensorFusion? - -# Reuse eval funcs? -def calculate_3d_iou(box1: BoundingBox, box2: BoundingBox) -> float: - return 0.0 def merge_boxes(box1: BoundingBox, box2: BoundingBox) -> BoundingBox: # TODO: merging @@ -45,6 +39,21 @@ def merge_boxes(box1: BoundingBox, box2: BoundingBox) -> BoundingBox: return merged_box +def get_aabb_corners(box: BoundingBox): + """ + Calculates the 3D Intersection over Union (IoU) between two bounding boxes. + This implementation uses axis-aligned bounding boxes so it does not consider rotation. + """ + + # Extract position and dimensions from each box + cx, cy, cz = box.pose.position.x, box.pose.position.y, box.pose.position.z + l, w, h = box.dimensions.x, box.dimensions.y, box.dimensions.z + + # min_x, max_x, min_y, max_y, min_z, max_z + return cx, cx + l, cy, cy + w, cz, cz + h + +def get_volume(box): + return box.dimensions.x * box.dimensions.y * box.dimensions.z class CombinedDetector3D(Component): def __init__( @@ -100,8 +109,7 @@ def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: Bo self.latest_yolo_bbxs = yolo_bbxs_msg self.latest_pp_bbxs = pp_bbxs_msg - def update(self, state: AllState) -> Dict[str, AgentState]: - vehicle = state.vehicle + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: current_time = self.vehicle_interface.time() yolo_bbx_array = self.latest_yolo_bbxs @@ -151,7 +159,7 @@ def _fuse_bounding_boxes(self, continue ## IoU - iou = calculate_3d_iou(yolo_box, pp_box) + iou = calculate_3d_iou(yolo_box, pp_box, get_aabb_corners, get_volume) if iou > self.iou_threshold and iou > best_iou: best_iou = iou diff --git a/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py b/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py index 58b14cb92..49d238073 100644 --- a/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py +++ b/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py @@ -110,7 +110,19 @@ def load_labels(file_path, is_gt=False): traceback.print_exc() return boxes -def calculate_3d_iou(box1, box2): +def get_aabb_corners(box): + # Uses internal format where cy is geometric center + cx, cy, cz, l, w, h = box[IDX_X], box[IDX_Y], box[IDX_Z], box[IDX_L], box[IDX_W], box[IDX_H] + # Ignores yaw + min_x, max_x = cx - l / 2, cx + l / 2 + min_y, max_y = cy - h / 2, cy + h / 2 # Uses geometric center cy + min_z, max_z = cz - w / 2, cz + w / 2 # Assuming cz is center z (depth) + return min_x, max_x, min_y, max_y, min_z, max_z + +def get_volume(box): + return box[IDX_L] * box[IDX_W] * box[IDX_H] + +def calculate_3d_iou(box1, box2, get_corners_cb, get_volume_cb): """ Calculates the 3D Intersection over Union (IoU) between two bounding boxes. @@ -118,6 +130,9 @@ def calculate_3d_iou(box1, box2): box1, box2: List or tuple representing a 3D bounding box in the *internal standardized format*: [cx, cy, cz, l, w, h, yaw, ...] where cy is the geometric center y. + OR box1 and box2 are BoundingBox objects + get_corners_cb is a callback that is used to extract the parameters + from the box1 and box2 arguments Returns: float: The 3D IoU value. @@ -126,17 +141,8 @@ def calculate_3d_iou(box1, box2): """ ####### Simple Axis-Aligned Bounding Box (AABB) IoU ####### - def get_aabb_corners(box): - # Uses internal format where cy is geometric center - cx, cy, cz, l, w, h = box[IDX_X], box[IDX_Y], box[IDX_Z], box[IDX_L], box[IDX_W], box[IDX_H] - # Ignores yaw - min_x, max_x = cx - l / 2, cx + l / 2 - min_y, max_y = cy - h / 2, cy + h / 2 # Uses geometric center cy - min_z, max_z = cz - w / 2, cz + w / 2 # Assuming cz is center z (depth) - return min_x, max_x, min_y, max_y, min_z, max_z - - min_x1, max_x1, min_y1, max_y1, min_z1, max_z1 = get_aabb_corners(box1) - min_x2, max_x2, min_y2, max_y2, min_z2, max_z2 = get_aabb_corners(box2) + min_x1, max_x1, min_y1, max_y1, min_z1, max_z1 = get_corners_cb(box1) + min_x2, max_x2, min_y2, max_y2, min_z2, max_z2 = get_corners_cb(box2) # Calculate intersection volume inter_min_x = max(min_x1, min_x2) @@ -152,8 +158,8 @@ def get_aabb_corners(box): intersection_volume = inter_l * inter_h * inter_w # Calculate union volume - vol1 = box1[IDX_L] * box1[IDX_W] * box1[IDX_H] - vol2 = box2[IDX_L] * box2[IDX_W] * box2[IDX_H] + vol1 = get_volume_cb(box1) + vol2 = get_volume_cb(box2) # Ensure volumes are positive and non-zero for stable IoU vol1 = max(vol1, 1e-6) vol2 = max(vol2, 1e-6) @@ -272,7 +278,7 @@ def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou for gt_idx, gt_box in enumerate(gt_boxes): # Explicitly check class match (belt-and-suspenders) if pred_box[IDX_CLASS] == gt_box[IDX_CLASS]: - iou = calculate_3d_iou(pred_box, gt_box) + iou = calculate_3d_iou(pred_box, gt_box, get_aabb_corners) if iou > best_iou: best_iou = iou best_gt_idx = gt_idx From 3574a80ed137e4c2685b01b417d48d1479d51748 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 7 May 2025 12:14:39 -0500 Subject: [PATCH 27/55] Fixed some run commands in the README file --- GEMstack/onboard/perception/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GEMstack/onboard/perception/README.md b/GEMstack/onboard/perception/README.md index 59095867c..d112481c3 100644 --- a/GEMstack/onboard/perception/README.md +++ b/GEMstack/onboard/perception/README.md @@ -40,9 +40,10 @@ $ source ~/catkin_ws/devel/setup.bash 1. Install Docker 2. Install Docker Compose 3. A bash script was created to handle docker permissions issues and make the set up process simpler: -$ bash GEMstack/onboard/perception/build_point_pillars.sh +$ cd GEMstack/onboard/perception +$ bash build_point_pillars.sh 4. Start the container (use sudo if you run into permissions issues) -$ docker compose -f GEMstack/onboard/perception/setup/docker-compose.yaml up +$ docker compose -f setup/docker-compose.yaml up 5. Run roscore on local machine (make sure you source first) $ roscore 6. Start up YOLO node (make sure you source first): From 914b109d439777c3825ecd8538fb450aa887b699 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 7 May 2025 12:16:08 -0500 Subject: [PATCH 28/55] Fixed a MAJOR problem that caused the detections to be extremely off. PointPillars uses reflectivity NOT intensity and points are now masked to the trained x, y, and z bounds of the pretrained weights --- .../onboard/perception/point_pillars_node.py | 49 ++++++++++++++++--- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 954a1e5bd..5f10ee1c3 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -10,6 +10,7 @@ from cv_bridge import CvBridge import time import os +import ros_numpy # PointPillars imports: import torch @@ -20,24 +21,41 @@ from geometry_msgs.msg import Pose, Vector3 -import numpy as np -import ros_numpy def pc2_to_numpy_with_intensity(pc2_msg, want_rgb=False): """ Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. - This function extracts the x, y, z coordinates from the point cloud. + This function extracts the x, y, z coordinates and reflecivity from the point cloud. """ # Convert the ROS message to a numpy structured array pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) - # Stack x,y,z fields to a (N,3) array + reflectivity = np.array(pc['reflectivity']).ravel() + + # Normalize reflectivity to 0-1 range + reflectivity_max = np.max(reflectivity) + if reflectivity_max > 1.0: + normalized_reflectivity = reflectivity / reflectivity_max + else: + normalized_reflectivity = reflectivity + + # Stack x,y,z, r fields to a (N,4) array pts = np.stack((np.array(pc['x']).ravel(), np.array(pc['y']).ravel(), np.array(pc['z']).ravel(), - np.array(pc['intensity']).ravel()), axis=1) - # Apply filtering (for example, x > 0 and z in a specified range) - mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) + normalized_reflectivity), + axis=1) + + # Restrict points to the model's default range: + x_min, y_min, z_min = 0, -39.68, -3 + x_max, y_max, z_max = 69.12, 39.68, 1 + + mask = ( + (pts[:, 0] >= x_min) & (pts[:, 0] <= x_max) & + (pts[:, 1] >= y_min) & (pts[:, 1] <= y_max) & + (pts[:, 2] >= z_min) & (pts[:, 2] <= z_max) + ) return pts[mask] + class PointPillarsNode(): """ Detects Pedestrians with PointPillars and publishes the results in vehicle frame. @@ -52,6 +70,7 @@ def __init__( self.camera_name = 'front' self.camera_front = (self.camera_name=='front') self.score_threshold = 0.4 + self.debug = True self.initialize() def initialize(self): @@ -163,7 +182,7 @@ def synchronized_callback(self, image_msg, lidar_msg): batched_pts = [lidar_tensor] # Get PointPillars predictions - results = self.pointpillars(batched_pts, mode='test') + results = self.pointpillars.forward(batched_pts, mode='test') if results and len(results) > 0 and 'lidar_bboxes' in results[0]: # Process PointPillars results @@ -194,6 +213,20 @@ def synchronized_callback(self, image_msg, lidar_msg): box.header.frame_id = 'velodyne' box.header.stamp = lidar_msg.header.stamp + if self.debug: + print("X VEHICLE") + print(x_vehicle) + print("L") + print(l) + print("Y VEHICLE") + print(y_vehicle) + print("W") + print(w) + print("Z VEHICLE") + print(z_vehicle) + print("H") + print(h) + # Set the pose (position and orientation) box.pose.position.x = float(x_vehicle) box.pose.position.y = float(y_vehicle) From 86d804589864d2efa4c1ca08e22a4c8a2467d2d4 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 7 May 2025 12:23:42 -0500 Subject: [PATCH 29/55] Added some debug code for development purposes --- GEMstack/onboard/perception/combined_detection.py | 11 ++++++++++- GEMstack/onboard/perception/yolo_node.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 608a7b66b..cef09a2ef 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -78,6 +78,7 @@ def __init__( self.yolo_topic = "/yolo_boxes" self.pp_topic = "/pointpillars_boxes" + self.debug = False rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.") @@ -93,6 +94,7 @@ def state_outputs(self) -> list: def initialize(self): self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray) self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray) + self.pub_fused = rospy.Publisher("/fused_boxes", BoundingBoxArray, queue_size=1) queue_size = 10 slop = 0.1 @@ -140,6 +142,7 @@ def _fuse_bounding_boxes(self, vehicle_state: VehicleState, current_time: float ) -> Dict[str, AgentState]: + original_header = yolo_bbx_array.header current_agents_in_frame: Dict[str, AgentState] = {} yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes pp_boxes: List[BoundingBox] = pp_bbx_array.boxes @@ -183,7 +186,13 @@ def _fuse_bounding_boxes(self, fused_boxes_list.append(pp_box) rospy.logdebug(f"Kept unmatched PP box {j}") - # Agenstate + if self.debug: + # Work in progress to visualize combined results + fused_array = BoundingBoxArray() + fused_array.header = yolo_bbx_array.header + fused_array.boxes = fused_boxes_list + self.pub_fused.publish(fused_array) + for i, box in enumerate(fused_boxes_list): try: # Cur vehicle frame diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index 747c71108..ea5cef968 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -253,6 +253,7 @@ def __init__( self.camera_name = 'front' self.camera_front = (self.camera_name=='front') self.score_threshold = 0.4 + self.debug = True self.initialize() def initialize(self): @@ -463,6 +464,20 @@ def synchronized_callback(self, image_msg, lidar_msg): box_msg.dimensions.x = float(dims[0]) # length box_msg.dimensions.y = float(dims[1]) # width box_msg.dimensions.z = float(dims[2]) # height + + if self.debug: + print("X") + print(center_vehicle[0]) + print("L") + print(dims[0]) + print("Y") + print(center_vehicle[1]) + print("W") + print(dims[1]) + print("Z") + print(center_vehicle[2]) + print("H") + print(dims[2]) # Add confidence score and label box_msg.value = float(conf_scores[i]) From 40106d6013f72f8a8d56075cf4188dce1b30086e Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 7 May 2025 13:57:09 -0500 Subject: [PATCH 30/55] Fixed tiny issue with reflectivity range --- .../onboard/perception/point_pillars_node.py | 49 ++++++++++++------- GEMstack/onboard/perception/yolo_node.py | 40 ++++++++++----- 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 5f10ee1c3..6b24af507 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -29,13 +29,10 @@ def pc2_to_numpy_with_intensity(pc2_msg, want_rgb=False): # Convert the ROS message to a numpy structured array pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) reflectivity = np.array(pc['reflectivity']).ravel() - + intensity = np.array(pc['intensity']).ravel() + # Normalize reflectivity to 0-1 range - reflectivity_max = np.max(reflectivity) - if reflectivity_max > 1.0: - normalized_reflectivity = reflectivity / reflectivity_max - else: - normalized_reflectivity = reflectivity + normalized_reflectivity = reflectivity / 255.0 # Stack x,y,z, r fields to a (N,4) array pts = np.stack((np.array(pc['x']).ravel(), @@ -198,6 +195,20 @@ def synchronized_callback(self, image_msg, lidar_msg): # Extract center position and dimensions x, y, z, l, w, h, yaw = bbox + if self.debug: + print("X LIDAR") + print(x) + print("L") + print(l) + print("Y LIDAR") + print(y) + print("W") + print(w) + print("Z LIDAR") + print(z) + print("H") + print(h) + # Transform from LiDAR to vehicle coordinates center_lidar = np.array([x, y, z, 1.0]) center_vehicle = self.T_l2v @ center_lidar @@ -213,19 +224,19 @@ def synchronized_callback(self, image_msg, lidar_msg): box.header.frame_id = 'velodyne' box.header.stamp = lidar_msg.header.stamp - if self.debug: - print("X VEHICLE") - print(x_vehicle) - print("L") - print(l) - print("Y VEHICLE") - print(y_vehicle) - print("W") - print(w) - print("Z VEHICLE") - print(z_vehicle) - print("H") - print(h) + # if self.debug: + # print("X VEHICLE") + # print(x_vehicle) + # print("L") + # print(l) + # print("Y VEHICLE") + # print(y_vehicle) + # print("W") + # print(w) + # print("Z VEHICLE") + # print(z_vehicle) + # print("H") + # print(h) # Set the pose (position and orientation) box.pose.position.x = float(x_vehicle) diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index ea5cef968..fa92c4251 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -433,6 +433,20 @@ def synchronized_callback(self, image_msg, lidar_msg): center = obb.center dims = tuple(obb.extent) R_lidar = obb.R.copy() + + if self.debug: + print("X") + print(center[0]) + print("L") + print(dims[0]) + print("Y") + print(center[1]) + print("W") + print(dims[1]) + print("Z") + print(center[2]) + print("H") + print(dims[2]) # Transform from LiDAR to vehicle coordinates center_hom = np.append(center, 1) @@ -465,19 +479,19 @@ def synchronized_callback(self, image_msg, lidar_msg): box_msg.dimensions.y = float(dims[1]) # width box_msg.dimensions.z = float(dims[2]) # height - if self.debug: - print("X") - print(center_vehicle[0]) - print("L") - print(dims[0]) - print("Y") - print(center_vehicle[1]) - print("W") - print(dims[1]) - print("Z") - print(center_vehicle[2]) - print("H") - print(dims[2]) + # if self.debug: + # print("X") + # print(center_vehicle[0]) + # print("L") + # print(dims[0]) + # print("Y") + # print(center_vehicle[1]) + # print("W") + # print(dims[1]) + # print("Z") + # print(center_vehicle[2]) + # print("H") + # print(dims[2]) # Add confidence score and label box_msg.value = float(conf_scores[i]) From fb356e50dea7b5fb77fed93b4d4620af9525fbf4 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 13 May 2025 14:41:17 -0500 Subject: [PATCH 31/55] Added yolo and point_pillars bounding box visualization code. Fixed Weird yolo bounding box rotation issue --- GEMstack/onboard/perception/README.md | 12 +- .../onboard/perception/combined_detection.py | 4 +- .../onboard/perception/point_pillars_node.py | 33 +---- GEMstack/onboard/perception/yolo_node.py | 131 ++++++------------ 4 files changed, 60 insertions(+), 120 deletions(-) diff --git a/GEMstack/onboard/perception/README.md b/GEMstack/onboard/perception/README.md index d112481c3..19dfca655 100644 --- a/GEMstack/onboard/perception/README.md +++ b/GEMstack/onboard/perception/README.md @@ -68,4 +68,14 @@ $ roslaunch basic_launch visualization.launch $ pip install 'ultralytics==8.1.5' 2. If you run into communication issues with ROS, please make sure you have sourced EVERY terminal window (except for docker window there's no need to): $ source /opt/ros/noetic/setup.bash -$ source ~/catkin_ws/devel/setup.bash \ No newline at end of file +$ source ~/catkin_ws/devel/setup.bash + +### Visualization Steps: +Please make sure you source each new terminal window after creating it (local source commands are below): +$ source /opt/ros/noetic/setup.bash +$ source ~/catkin_ws/devel/setup.bash + +1. Start rviz: +$ rviz +2. Publish a static transform from the map to visualize the published bounding box data: +$ rosrun tf2_ros static_transform_publisher 0 0 0 0 0 0 map currentVehicleFrame diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index cef09a2ef..3db197b09 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -153,6 +153,7 @@ def _fuse_bounding_boxes(self, matched_pp_indices = set() fused_boxes_list: List[BoundingBox] = [] + # Match the boxes # Can optimize from NxM loop for i, yolo_box in enumerate(yolo_boxes): best_match_j = -1 @@ -175,12 +176,13 @@ def _fuse_bounding_boxes(self, merged = merge_boxes(yolo_box, pp_boxes[best_match_j]) fused_boxes_list.append(merged) - ## Unmatched Bboxes + ## UAdd the unmatched YOLO boxes for i, yolo_box in enumerate(yolo_boxes): if i not in matched_yolo_indices: fused_boxes_list.append(yolo_box) rospy.logdebug(f"Kept unmatched YOLO box {i}") + # Add the unmatched PointPillars boxes for j, pp_box in enumerate(pp_boxes): if j not in matched_pp_indices: fused_boxes_list.append(pp_box) diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 6b24af507..9ebd9e40d 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -150,7 +150,7 @@ def initialize(self): self.lidar_sub = Subscriber('/ouster/points', PointCloud2) self.sync = ApproximateTimeSynchronizer([ self.rgb_sub, self.lidar_sub - ], queue_size=500, slop=0.05) + ], queue_size=10, slop=0.1) self.sync.registerCallback(self.synchronized_callback) def synchronized_callback(self, image_msg, lidar_msg): @@ -165,10 +165,9 @@ def synchronized_callback(self, image_msg, lidar_msg): lidar_down = self.latest_lidar.copy() boxes = BoundingBoxArray() - boxes.header.frame_id = 'velodyne' + boxes.header.frame_id = 'currentVehicleFrame' boxes.header.stamp = lidar_msg.header.stamp - pointpillars_detections = [] with torch.no_grad(): # Convert to tensor and format for PointPillars lidar_tensor = torch.from_numpy(lidar_down).float() @@ -195,20 +194,6 @@ def synchronized_callback(self, image_msg, lidar_msg): # Extract center position and dimensions x, y, z, l, w, h, yaw = bbox - if self.debug: - print("X LIDAR") - print(x) - print("L") - print(l) - print("Y LIDAR") - print(y) - print("W") - print(w) - print("Z LIDAR") - print(z) - print("H") - print(h) - # Transform from LiDAR to vehicle coordinates center_lidar = np.array([x, y, z, 1.0]) center_vehicle = self.T_l2v @ center_lidar @@ -221,7 +206,7 @@ def synchronized_callback(self, image_msg, lidar_msg): # Create a ROS BoundingBox message box = BoundingBox() - box.header.frame_id = 'velodyne' + box.header.frame_id = 'currentVehicleFrame' box.header.stamp = lidar_msg.header.stamp # if self.debug: @@ -261,19 +246,7 @@ def synchronized_callback(self, image_msg, lidar_msg): boxes.boxes.append(box) - # Also store detection info in the list if needed for other processing - pointpillars_detections.append({ - 'pose': { - 'position': [x, y, z], - 'orientation': [quat[0], quat[1], quat[2], quat[3]] - }, - 'dimensions': [l, w, h], - 'score': float(score) - }) - rospy.loginfo(f"Pedestrian detected at ({x:.2f}, {y:.2f}, {z:.2f}) with score {score:.2f}") - - # Save point_pillars_detections here for later if needed # Publish the bounding boxes rospy.loginfo(f"Publishing {len(boxes.boxes)} pedestrian bounding boxes") diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index fa92c4251..c6eb6b6d9 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -25,18 +25,6 @@ from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray from geometry_msgs.msg import Pose, Vector3 -def cylindrical_roi(points, center, radius, height): - horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1) - vertical_diff = np.abs(points[:, 2] - center[2]) - mask = (horizontal_dist <= radius) & (vertical_diff <= height / 2) - return points[mask] - - -def filter_points_within_threshold(points, threshold=15.0): - distances = np.linalg.norm(points, axis=1) - mask = distances <= threshold - return points[mask] - def extract_roi_box(lidar_pc, center, half_extents): """ @@ -47,7 +35,6 @@ def extract_roi_box(lidar_pc, center, half_extents): mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) return lidar_pc[mask] - def pc2_to_numpy(pc2_msg, want_rgb=False): """ Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. @@ -55,12 +42,12 @@ def pc2_to_numpy(pc2_msg, want_rgb=False): """ # Convert the ROS message to a numpy structured array pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) - # Stack x,y,z fields to a (N,3) array + # Convert each field to a 1D array and stack along axis 1 to get (N, 3) pts = np.stack((np.array(pc['x']).ravel(), np.array(pc['y']).ravel(), np.array(pc['z']).ravel()), axis=1) - # Apply filtering (for example, x > 0 and z in a specified range) - mask = (pts[:, 0] > -0.5) & (pts[:, 2] < -1) & (pts[:, 2] > -2.7) + # Apply filtering (for example, x > 0 and z < 2.5) + mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5) return pts[mask] @@ -146,28 +133,21 @@ def create_ray_line_set(start, end): line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) return line_set - def downsample_points(lidar_points, voxel_size=0.15): pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(lidar_points) down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) return np.asarray(down_pcd.points) +def filter_depth_points(lidar_points, max_human_depth=0.9): -def filter_depth_points(lidar_points, max_depth_diff=0.9, use_norm=True): if lidar_points.shape[0] == 0: return lidar_points - - if use_norm: - depths = np.linalg.norm(lidar_points, axis=1) - else: - depths = lidar_points[:, 0] - - min_depth = np.min(depths) - max_possible_depth = min_depth + max_depth_diff - mask = depths < max_possible_depth - return lidar_points[mask] - + lidar_points_dist = lidar_points[:, 0] + min_dist = np.min(lidar_points_dist) + max_possible_dist = min_dist + max_human_depth + filtered_array = lidar_points[lidar_points_dist < max_possible_dist] + return filtered_array def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): """ @@ -182,20 +162,20 @@ def visualize_geometries(geometries, window_name="Open3D", width=800, height=600 vis.run() vis.destroy_window() - def pose_to_matrix(pose): """ Compose a 4x4 transformation matrix from a pose state. Assumes pose has attributes: x, y, z, yaw, pitch, roll, where the angles are given in degrees. """ + # Use default values if any are None (e.g. if the car is not moving) x = pose.x if pose.x is not None else 0.0 y = pose.y if pose.y is not None else 0.0 z = pose.z if pose.z is not None else 0.0 if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: - yaw = pose.yaw - pitch = pose.pitch - roll = pose.roll + yaw = math.radians(pose.yaw) + pitch = math.radians(pose.pitch) + roll = math.radians(pose.roll) else: yaw = 0.0 pitch = 0.0 @@ -213,7 +193,6 @@ def transform_points_l2c(lidar_points, T_l2c): pts_cam = (T_l2c @ pts_hom.T).T # (N,4) return pts_cam[:, :3] - # ----- New: Vectorized projection function ----- def project_points(pts_cam, K, original_lidar_points): """ @@ -319,7 +298,7 @@ def initialize(self): self.lidar_sub = Subscriber('/ouster/points', PointCloud2) self.sync = ApproximateTimeSynchronizer([ self.rgb_sub, self.lidar_sub - ], queue_size=500, slop=0.05) + ], queue_size=10, slop=0.1) self.sync.registerCallback(self.synchronized_callback) def synchronized_callback(self, image_msg, lidar_msg): @@ -334,9 +313,6 @@ def synchronized_callback(self, image_msg, lidar_msg): # Gate guards against data not being present for both sensors: if self.latest_image is None or self.latest_lidar is None: return {} - else: - print(type(self.latest_image)) - print(type(self.latest_lidar)) lastest_image = self.latest_image.copy() downsample = False @@ -364,7 +340,7 @@ def synchronized_callback(self, image_msg, lidar_msg): undistorted_img = lastest_image.copy() orig_H, orig_W = lastest_image.shape[:2] self.current_K = self.K - results_normal = self.detector(img_normal, conf=0.35, classes=[0]) + results_normal = self.detector(img_normal, conf=0.4, classes=[0]) combined_boxes = [] boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] @@ -384,11 +360,10 @@ def synchronized_callback(self, image_msg, lidar_msg): # Create empty list of bounding boxes to fill and publish later boxes = BoundingBoxArray() - boxes.header.frame_id = 'velodyne' + boxes.header.frame_id = 'currentVehicleFrame' boxes.header.stamp = lidar_msg.header.stamp # Process YOLO detections - yolo_detections = [] boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] conf_scores = np.array(results_normal[0].boxes.conf.cpu()) if len(results_normal) > 0 else [] @@ -415,57 +390,46 @@ def synchronized_callback(self, image_msg, lidar_msg): # Get the 3D points corresponding to the box points_3d = roi_pts[:, 2:5] - - # Filter points to get a cleaner cluster - points_3d = filter_points_within_threshold(points_3d, 40) - points_3d = remove_ground_by_min_range(points_3d, z_range=0.08) - points_3d = filter_depth_points(points_3d, max_depth_diff=0.5) - - if points_3d.shape[0] < 4: + points_3d = filter_depth_points(points_3d, max_human_depth=0.8) + refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10) + refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.03) + + if refined_cluster.shape[0] < 5: continue # Create a point cloud from the filtered points pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(points_3d) + pcd.points = o3d.utility.Vector3dVector(refined_cluster) # Get an oriented bounding box obb = pcd.get_oriented_bounding_box() - center = obb.center + refined_center = obb.center dims = tuple(obb.extent) R_lidar = obb.R.copy() - - if self.debug: - print("X") - print(center[0]) - print("L") - print(dims[0]) - print("Y") - print(center[1]) - print("W") - print(dims[1]) - print("Z") - print(center[2]) - print("H") - print(dims[2]) + # We are assuming that dims[0] is height and dims[2] is length of obb.extent + # Transform from LiDAR to vehicle coordinates - center_hom = np.append(center, 1) - center_vehicle_hom = self.T_l2v @ center_hom - center_vehicle = center_vehicle_hom[:3] + refined_center_hom = np.append(refined_center, 1) + refined_center_vehicle_hom = self.T_l2v @ refined_center_hom + refined_center_vehicle = refined_center_vehicle_hom[:3] # Calculate rotation in vehicle frame R_vehicle = self.T_l2v[:3, :3] @ R_lidar - yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + # yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + yaw = np.arctan2(R_vehicle[1, 0], R_vehicle[0, 0]) + + refined_center = refined_center_vehicle # Create a ROS BoundingBox message box_msg = BoundingBox() - box_msg.header.frame_id = 'velodyne' + box_msg.header.frame_id = 'currentVehicleFrame' box_msg.header.stamp = lidar_msg.header.stamp # Set the pose - box_msg.pose.position.x = float(center_vehicle[0]) - box_msg.pose.position.y = float(center_vehicle[1]) - box_msg.pose.position.z = float(center_vehicle[2]) + box_msg.pose.position.x = float(refined_center_vehicle[0]) + box_msg.pose.position.y = float(refined_center_vehicle[1]) + box_msg.pose.position.z = float(refined_center_vehicle[2]) # Convert yaw to quaternion quat = R.from_euler('z', yaw).as_quat() @@ -475,21 +439,22 @@ def synchronized_callback(self, image_msg, lidar_msg): box_msg.pose.orientation.w = float(quat[3]) # Set the dimensions - box_msg.dimensions.x = float(dims[0]) # length + # Swapped dims[2] and dims[0] + box_msg.dimensions.x = float(dims[2]) # length box_msg.dimensions.y = float(dims[1]) # width - box_msg.dimensions.z = float(dims[2]) # height + box_msg.dimensions.z = float(dims[0]) # height # if self.debug: # print("X") - # print(center_vehicle[0]) + # print(refined_center_vehicle[0]) # print("L") # print(dims[0]) # print("Y") - # print(center_vehicle[1]) + # print(refined_center_vehicle[1]) # print("W") # print(dims[1]) # print("Z") - # print(center_vehicle[2]) + # print(refined_center_vehicle[2]) # print("H") # print(dims[2]) @@ -499,17 +464,7 @@ def synchronized_callback(self, image_msg, lidar_msg): boxes.boxes.append(box_msg) - # Store detection info - yolo_detections.append({ - 'pose': { - 'position': center_vehicle, - 'orientation': [quat[0], quat[1], quat[2], quat[3]] - }, - 'dimensions': dims, - 'score': float(conf_scores[i]) - }) - - rospy.loginfo(f"Person detected at ({center_vehicle[0]:.2f}, {center_vehicle[1]:.2f}, {center_vehicle[2]:.2f}) with score {conf_scores[i]:.2f}") + rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, {refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) with score {conf_scores[i]:.2f}") # Publish the bounding boxes rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes") From da235ed030f23f2d2baa61501e6c5305126fc013 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 13 May 2025 16:24:35 -0500 Subject: [PATCH 32/55] Moved shared pedestrian helper functions to utility files. Fixed bounding box orientation issues for yolo and pointpillars files. Updated README with visualization instructions --- GEMstack/onboard/perception/README.md | 2 + .../onboard/perception/combined_detection.py | 17 +- .../perception/pedestrian_detection.py | 252 +----------------- .../onboard/perception/pedestrian_utils.py | 215 +++++++++++++++ .../perception/pedestrian_utils_gem.py | 34 +++ .../onboard/perception/point_pillars_node.py | 2 +- GEMstack/onboard/perception/yolo_node.py | 204 +------------- 7 files changed, 266 insertions(+), 460 deletions(-) create mode 100644 GEMstack/onboard/perception/pedestrian_utils.py create mode 100644 GEMstack/onboard/perception/pedestrian_utils_gem.py diff --git a/GEMstack/onboard/perception/README.md b/GEMstack/onboard/perception/README.md index 19dfca655..58c914fad 100644 --- a/GEMstack/onboard/perception/README.md +++ b/GEMstack/onboard/perception/README.md @@ -79,3 +79,5 @@ $ source ~/catkin_ws/devel/setup.bash $ rviz 2. Publish a static transform from the map to visualize the published bounding box data: $ rosrun tf2_ros static_transform_publisher 0 0 0 0 0 0 map currentVehicleFrame +3. In Rviz, click "add" in the bottom left corner. In "By display type", under "jsk_rviz_plugins" select BoundingBoxArray. +4. Expand BoundingBoxArray on the left. Under it you will see "Topic" with a blank space to the right of it. Click the blank space (it's a hidden drop down box) and select the BoundingBoxArray topic to visualize \ No newline at end of file diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 3db197b09..1e438f7d7 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -1,7 +1,8 @@ from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum from ..interface.gem import GEMInterface from ..component import Component -from .perception_utils import * +# from .perception_utils import * +from .pedestrian_utils_gem import * from typing import Dict import rospy from message_filters import Subscriber, ApproximateTimeSynchronizer @@ -15,7 +16,6 @@ from .sensorFusion.eval_3d_bbox_performance import calculate_3d_iou - def merge_boxes(box1: BoundingBox, box2: BoundingBox) -> BoundingBox: # TODO: merging # Heuristics- Average pose @@ -188,12 +188,10 @@ def _fuse_bounding_boxes(self, fused_boxes_list.append(pp_box) rospy.logdebug(f"Kept unmatched PP box {j}") - if self.debug: - # Work in progress to visualize combined results - fused_array = BoundingBoxArray() - fused_array.header = yolo_bbx_array.header - fused_array.boxes = fused_boxes_list - self.pub_fused.publish(fused_array) + # Work in progress to visualize combined results + fused_bb_array = BoundingBoxArray() + fused_bb_array.header = original_header + fused_bb_array.boxes = fused_boxes_list for i, box in enumerate(fused_boxes_list): try: @@ -225,7 +223,7 @@ def _fuse_bounding_boxes(self, # temp id # _update_tracking assign persistent IDs - temp_agent_id = f"FrameDet_{i}" + temp_agent_id = f"pedestrian{i}" current_agents_in_frame[temp_agent_id] = AgentState( pose=final_pose, dimensions=dims, outline=None, type=agent_type, @@ -236,6 +234,7 @@ def _fuse_bounding_boxes(self, rospy.logwarn(f"Failed to convert final BoundingBox {i} to AgentState: {e}") continue + self.pub_fused.publish(fused_bb_array) return current_agents_in_frame diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 4efe85bca..d9a535852 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -2,261 +2,16 @@ from ..interface.gem import GEMInterface from ..component import Component from ultralytics import YOLO -import cv2 from typing import Dict -import open3d as o3d -import numpy as np from sklearn.cluster import DBSCAN from scipy.spatial.transform import Rotation as R import rospy from sensor_msgs.msg import PointCloud2, Image -import sensor_msgs.point_cloud2 as pc2 -import struct, ctypes from message_filters import Subscriber, ApproximateTimeSynchronizer from cv_bridge import CvBridge import time -import math -import ros_numpy - - -# ----- Helper Functions ----- - -def match_existing_pedestrian( - new_center: np.ndarray, - new_dims: tuple, - existing_agents: Dict[str, AgentState], - distance_threshold: float = 1.0 -) -> str: - """ - Find the closest existing pedestrian agent within a specified distance threshold. - """ - best_agent_id = None - best_dist = float('inf') - for agent_id, agent_state in existing_agents.items(): - old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) - dist = np.linalg.norm(new_center - old_center) - if dist < distance_threshold and dist < best_dist: - best_dist = dist - best_agent_id = agent_id - return best_agent_id - - -def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: - """ - Compute the (vx, vy, vz) velocity based on change in pose over time. - """ - if dt <= 0: - return (0, 0, 0) - vx = (new_pose.x - old_pose.x) / dt - vy = (new_pose.y - old_pose.y) / dt - vz = (new_pose.z - old_pose.z) / dt - return (vx, vy, vz) - - -def extract_roi_box(lidar_pc, center, half_extents): - """ - Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. - """ - lower = center - half_extents - upper = center + half_extents - mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) - return lidar_pc[mask] - - -# def pc2_to_numpy(pc2_msg, want_rgb=False): -# """ -# Convert a ROS PointCloud2 message into a numpy array. -# This function extracts the x, y, z coordinates from the point cloud. -# """ -# start = time.time() -# gen = pc2.read_points(pc2_msg, skip_nans=True) -# end = time.time() -# print('Read lidar points: ', end - start) -# start = time.time() -# pts = np.array(list(gen), dtype=np.float16) -# pts = pts[:, :3] # Only x, y, z coordinates -# mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5) -# end = time.time() -# print('Convert to numpy: ', end - start) -# return pts[mask] - -def pc2_to_numpy(pc2_msg, want_rgb=False): - """ - Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. - This function extracts the x, y, z coordinates from the point cloud. - """ - # Convert the ROS message to a numpy structured array - pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) - # Convert each field to a 1D array and stack along axis 1 to get (N, 3) - pts = np.stack((np.array(pc['x']).ravel(), - np.array(pc['y']).ravel(), - np.array(pc['z']).ravel()), axis=1) - # Apply filtering (for example, x > 0 and z < 2.5) - mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5) - return pts[mask] - - - -def backproject_pixel(u, v, K): - """ - Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. - """ - cx, cy = K[0, 2], K[1, 2] - fx, fy = K[0, 0], K[1, 1] - x = (u - cx) / fx - y = (v - cy) / fy - ray_dir = np.array([x, y, 1.0]) - return ray_dir / np.linalg.norm(ray_dir) - - -def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, - t_min, t_max, t_step, - distance_threshold, min_points, ransac_threshold): - """ - Identify the center of a human along a projected ray. - (This function is no longer used in the new approach.) - """ - return None, None, None - - -def extract_roi(pc, center, roi_radius): - """ - Extract points from a point cloud that lie within a specified radius of a center point. - """ - distances = np.linalg.norm(pc - center, axis=1) - return pc[distances < roi_radius] - - -def refine_cluster(roi_points, center, eps=0.2, min_samples=10): - """ - Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. - """ - if roi_points.shape[0] < min_samples: - return roi_points - clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) - labels = clustering.labels_ - valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] - if not valid_clusters: - return roi_points - best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) - return best_cluster - - -def remove_ground_by_min_range(cluster, z_range=0.05): - """ - Remove points within z_range of the minimum z (assumed to be ground). - """ - if cluster is None or cluster.shape[0] == 0: - return cluster - min_z = np.min(cluster[:, 2]) - filtered = cluster[cluster[:, 2] > (min_z + z_range)] - return filtered - - -def get_bounding_box_center_and_dimensions(points): - """ - Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. - """ - if points.shape[0] == 0: - return None, None - min_vals = np.min(points, axis=0) - max_vals = np.max(points, axis=0) - center = (min_vals + max_vals) / 2 - dimensions = max_vals - min_vals - return center, dimensions - - -def create_ray_line_set(start, end): - """ - Create an Open3D LineSet object representing a ray between two 3D points. - The line is colored yellow. - """ - points = [start, end] - lines = [[0, 1]] - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) - return line_set - -def downsample_points(lidar_points, voxel_size=0.15): - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(lidar_points) - down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) - return np.asarray(down_pcd.points) - -def filter_depth_points(lidar_points, max_human_depth=0.9): - - if lidar_points.shape[0] == 0: - return lidar_points - lidar_points_dist = lidar_points[:, 0] - min_dist = np.min(lidar_points_dist) - max_possible_dist = min_dist + max_human_depth - filtered_array = lidar_points[lidar_points_dist < max_possible_dist] - return filtered_array - -def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): - """ - Visualize a list of Open3D geometry objects in a dedicated window. - """ - vis = o3d.visualization.Visualizer() - vis.create_window(window_name=window_name, width=width, height=height) - for geom in geometries: - vis.add_geometry(geom) - opt = vis.get_render_option() - opt.point_size = point_size - vis.run() - vis.destroy_window() - -def pose_to_matrix(pose): - """ - Compose a 4x4 transformation matrix from a pose state. - Assumes pose has attributes: x, y, z, yaw, pitch, roll, - where the angles are given in degrees. - """ - # Use default values if any are None (e.g. if the car is not moving) - x = pose.x if pose.x is not None else 0.0 - y = pose.y if pose.y is not None else 0.0 - z = pose.z if pose.z is not None else 0.0 - if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: - yaw = math.radians(pose.yaw) - pitch = math.radians(pose.pitch) - roll = math.radians(pose.roll) - else: - yaw = 0.0 - pitch = 0.0 - roll = 0.0 - R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() - T = np.eye(4) - T[:3, :3] = R_mat - T[:3, 3] = np.array([x, y, z]) - return T - - -def transform_points_l2c(lidar_points, T_l2c): - N = lidar_points.shape[0] - pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) - pts_cam = (T_l2c @ pts_hom.T).T # (N,4) - return pts_cam[:, :3] - -# ----- New: Vectorized projection function ----- -def project_points(pts_cam, K, original_lidar_points): - """ - Vectorized version. - pts_cam: (N,3) array of points in camera coordinates. - original_lidar_points: (N,3) array of points in LiDAR coordinates. - Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. - """ - mask = pts_cam[:, 2] > 0 - pts_cam_valid = pts_cam[mask] - lidar_valid = original_lidar_points[mask] - Xc = pts_cam_valid[:, 0] - Yc = pts_cam_valid[:, 1] - Zc = pts_cam_valid[:, 2] - u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) - v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) - proj = np.column_stack((u, v, lidar_valid)) - return proj +from .pedestrian_utils import * # Import the moved helper functions +from .pedestrian_utils_gem import * # Import the moved GEM related helper functions # ----- Pedestrian Detector 2D (New Approach) ----- @@ -302,7 +57,8 @@ def initialize(self): self.sync = ApproximateTimeSynchronizer([self.rgb_sub, self.lidar_sub], queue_size=10, slop=0.1) self.sync.registerCallback(self.synchronized_callback) - self.detector = YOLO('../../knowledge/detection/yolov8s.pt') + # self.detector = YOLO('../../knowledge/detection/yolov8s.pt') + self.detector = YOLO('GEMstack/knowledge/detection/yolov8s.pt') self.detector.to('cuda') self.K = np.array([[684.83331299, 0., 573.37109375], [0., 684.60968018, 363.70092773], diff --git a/GEMstack/onboard/perception/pedestrian_utils.py b/GEMstack/onboard/perception/pedestrian_utils.py new file mode 100644 index 000000000..7a8befb22 --- /dev/null +++ b/GEMstack/onboard/perception/pedestrian_utils.py @@ -0,0 +1,215 @@ +from typing import Dict +import open3d as o3d +import numpy as np +from sklearn.cluster import DBSCAN +from scipy.spatial.transform import Rotation as R +from sensor_msgs.msg import PointCloud2 +import math +import ros_numpy + +# ----- Pedestrian Detection Helper Functions ----- + +def extract_roi_box(lidar_pc, center, half_extents): + """ + Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. + """ + lower = center - half_extents + upper = center + half_extents + mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) + return lidar_pc[mask] + + +# def pc2_to_numpy(pc2_msg, want_rgb=False): +# """ +# Convert a ROS PointCloud2 message into a numpy array. +# This function extracts the x, y, z coordinates from the point cloud. +# """ +# start = time.time() +# gen = pc2.read_points(pc2_msg, skip_nans=True) +# end = time.time() +# print('Read lidar points: ', end - start) +# start = time.time() +# pts = np.array(list(gen), dtype=np.float16) +# pts = pts[:, :3] # Only x, y, z coordinates +# mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5) +# end = time.time() +# print('Convert to numpy: ', end - start) +# return pts[mask] + +def pc2_to_numpy(pc2_msg, want_rgb=False): + """ + Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. + This function extracts the x, y, z coordinates from the point cloud. + """ + # Convert the ROS message to a numpy structured array + pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) + # Convert each field to a 1D array and stack along axis 1 to get (N, 3) + pts = np.stack((np.array(pc['x']).ravel(), + np.array(pc['y']).ravel(), + np.array(pc['z']).ravel()), axis=1) + # Apply filtering (for example, x > 0 and z < 2.5) + mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5) + return pts[mask] + + + +def backproject_pixel(u, v, K): + """ + Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. + """ + cx, cy = K[0, 2], K[1, 2] + fx, fy = K[0, 0], K[1, 1] + x = (u - cx) / fx + y = (v - cy) / fy + ray_dir = np.array([x, y, 1.0]) + return ray_dir / np.linalg.norm(ray_dir) + + +def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, + t_min, t_max, t_step, + distance_threshold, min_points, ransac_threshold): + """ + Identify the center of a human along a projected ray. + (This function is no longer used in the new approach.) + """ + return None, None, None + + +def extract_roi(pc, center, roi_radius): + """ + Extract points from a point cloud that lie within a specified radius of a center point. + """ + distances = np.linalg.norm(pc - center, axis=1) + return pc[distances < roi_radius] + + +def refine_cluster(roi_points, center, eps=0.2, min_samples=10): + """ + Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. + """ + if roi_points.shape[0] < min_samples: + return roi_points + clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) + labels = clustering.labels_ + valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] + if not valid_clusters: + return roi_points + best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) + return best_cluster + + +def remove_ground_by_min_range(cluster, z_range=0.05): + """ + Remove points within z_range of the minimum z (assumed to be ground). + """ + if cluster is None or cluster.shape[0] == 0: + return cluster + min_z = np.min(cluster[:, 2]) + filtered = cluster[cluster[:, 2] > (min_z + z_range)] + return filtered + + +def get_bounding_box_center_and_dimensions(points): + """ + Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. + """ + if points.shape[0] == 0: + return None, None + min_vals = np.min(points, axis=0) + max_vals = np.max(points, axis=0) + center = (min_vals + max_vals) / 2 + dimensions = max_vals - min_vals + return center, dimensions + + +def create_ray_line_set(start, end): + """ + Create an Open3D LineSet object representing a ray between two 3D points. + The line is colored yellow. + """ + points = [start, end] + lines = [[0, 1]] + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) + return line_set + +def downsample_points(lidar_points, voxel_size=0.15): + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(lidar_points) + down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) + return np.asarray(down_pcd.points) + +def filter_depth_points(lidar_points, max_human_depth=0.9): + + if lidar_points.shape[0] == 0: + return lidar_points + lidar_points_dist = lidar_points[:, 0] + min_dist = np.min(lidar_points_dist) + max_possible_dist = min_dist + max_human_depth + filtered_array = lidar_points[lidar_points_dist < max_possible_dist] + return filtered_array + +def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): + """ + Visualize a list of Open3D geometry objects in a dedicated window. + """ + vis = o3d.visualization.Visualizer() + vis.create_window(window_name=window_name, width=width, height=height) + for geom in geometries: + vis.add_geometry(geom) + opt = vis.get_render_option() + opt.point_size = point_size + vis.run() + vis.destroy_window() + +def pose_to_matrix(pose): + """ + Compose a 4x4 transformation matrix from a pose state. + Assumes pose has attributes: x, y, z, yaw, pitch, roll, + where the angles are given in degrees. + """ + # Use default values if any are None (e.g. if the car is not moving) + x = pose.x if pose.x is not None else 0.0 + y = pose.y if pose.y is not None else 0.0 + z = pose.z if pose.z is not None else 0.0 + if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: + yaw = math.radians(pose.yaw) + pitch = math.radians(pose.pitch) + roll = math.radians(pose.roll) + else: + yaw = 0.0 + pitch = 0.0 + roll = 0.0 + R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() + T = np.eye(4) + T[:3, :3] = R_mat + T[:3, 3] = np.array([x, y, z]) + return T + + +def transform_points_l2c(lidar_points, T_l2c): + N = lidar_points.shape[0] + pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) + pts_cam = (T_l2c @ pts_hom.T).T # (N,4) + return pts_cam[:, :3] + +# ----- New: Vectorized projection function ----- +def project_points(pts_cam, K, original_lidar_points): + """ + Vectorized version. + pts_cam: (N,3) array of points in camera coordinates. + original_lidar_points: (N,3) array of points in LiDAR coordinates. + Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. + """ + mask = pts_cam[:, 2] > 0 + pts_cam_valid = pts_cam[mask] + lidar_valid = original_lidar_points[mask] + Xc = pts_cam_valid[:, 0] + Yc = pts_cam_valid[:, 1] + Zc = pts_cam_valid[:, 2] + u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) + v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) + proj = np.column_stack((u, v, lidar_valid)) + return proj \ No newline at end of file diff --git a/GEMstack/onboard/perception/pedestrian_utils_gem.py b/GEMstack/onboard/perception/pedestrian_utils_gem.py new file mode 100644 index 000000000..6a313e27b --- /dev/null +++ b/GEMstack/onboard/perception/pedestrian_utils_gem.py @@ -0,0 +1,34 @@ +from ...state import ObjectPose, AgentState +from typing import Dict +import numpy as np + +def match_existing_pedestrian( + new_center: np.ndarray, + new_dims: tuple, + existing_agents: Dict[str, AgentState], + distance_threshold: float = 1.0 +) -> str: + """ + Find the closest existing pedestrian agent within a specified distance threshold. + """ + best_agent_id = None + best_dist = float('inf') + for agent_id, agent_state in existing_agents.items(): + old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) + dist = np.linalg.norm(new_center - old_center) + if dist < distance_threshold and dist < best_dist: + best_dist = dist + best_agent_id = agent_id + return best_agent_id + + +def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: + """ + Compute the (vx, vy, vz) velocity based on change in pose over time. + """ + if dt <= 0: + return (0, 0, 0) + vx = (new_pose.x - old_pose.x) / dt + vy = (new_pose.y - old_pose.y) / dt + vz = (new_pose.z - old_pose.z) / dt + return (vx, vy, vz) \ No newline at end of file diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 9ebd9e40d..db5aad1c1 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -229,7 +229,7 @@ def synchronized_callback(self, image_msg, lidar_msg): box.pose.position.z = float(z_vehicle) # Convert yaw to quaternion - quat = R.from_euler('z', yaw).as_quat() + quat = R.from_euler('z', vehicle_yaw).as_quat() box.pose.orientation.x = float(quat[0]) box.pose.orientation.y = float(quat[1]) box.pose.orientation.z = float(quat[2]) diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index c6eb6b6d9..81830d423 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -1,218 +1,18 @@ -# from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum -# from ..interface.gem import GEMInterface -# from ..component import Component -# from perception_utils import * +from pedestrian_utils import * from ultralytics import YOLO import cv2 -from typing import Dict -import open3d as o3d -import numpy as np from scipy.spatial.transform import Rotation as R import rospy from sensor_msgs.msg import PointCloud2, Image from message_filters import Subscriber, ApproximateTimeSynchronizer from cv_bridge import CvBridge import time -import os -import yaml -import ros_numpy - - -from sklearn.cluster import DBSCAN -import sensor_msgs.point_cloud2 as pc2 # Publisher imports: from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray from geometry_msgs.msg import Pose, Vector3 -def extract_roi_box(lidar_pc, center, half_extents): - """ - Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. - """ - lower = center - half_extents - upper = center + half_extents - mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) - return lidar_pc[mask] - -def pc2_to_numpy(pc2_msg, want_rgb=False): - """ - Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. - This function extracts the x, y, z coordinates from the point cloud. - """ - # Convert the ROS message to a numpy structured array - pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) - # Convert each field to a 1D array and stack along axis 1 to get (N, 3) - pts = np.stack((np.array(pc['x']).ravel(), - np.array(pc['y']).ravel(), - np.array(pc['z']).ravel()), axis=1) - # Apply filtering (for example, x > 0 and z < 2.5) - mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5) - return pts[mask] - - -def backproject_pixel(u, v, K): - """ - Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. - """ - cx, cy = K[0, 2], K[1, 2] - fx, fy = K[0, 0], K[1, 1] - x = (u - cx) / fx - y = (v - cy) / fy - ray_dir = np.array([x, y, 1.0]) - return ray_dir / np.linalg.norm(ray_dir) - - -def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, - t_min, t_max, t_step, - distance_threshold, min_points, ransac_threshold): - """ - Identify the center of a human along a projected ray. - (This function is no longer used in the new approach.) - """ - return None, None, None - - -def extract_roi(pc, center, roi_radius): - """ - Extract points from a point cloud that lie within a specified radius of a center point. - """ - distances = np.linalg.norm(pc - center, axis=1) - return pc[distances < roi_radius] - - -def refine_cluster(roi_points, center, eps=0.2, min_samples=10): - """ - Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. - """ - if roi_points.shape[0] < min_samples: - return roi_points - clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) - labels = clustering.labels_ - valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] - if not valid_clusters: - return roi_points - best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) - return best_cluster - - -def remove_ground_by_min_range(cluster, z_range=0.05): - """ - Remove points within z_range of the minimum z (assumed to be ground). - """ - if cluster is None or cluster.shape[0] == 0: - return cluster - min_z = np.min(cluster[:, 2]) - filtered = cluster[cluster[:, 2] > (min_z + z_range)] - return filtered - - -def get_bounding_box_center_and_dimensions(points): - """ - Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. - """ - if points.shape[0] == 0: - return None, None - min_vals = np.min(points, axis=0) - max_vals = np.max(points, axis=0) - center = (min_vals + max_vals) / 2 - dimensions = max_vals - min_vals - return center, dimensions - - -def create_ray_line_set(start, end): - """ - Create an Open3D LineSet object representing a ray between two 3D points. - The line is colored yellow. - """ - points = [start, end] - lines = [[0, 1]] - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) - return line_set - -def downsample_points(lidar_points, voxel_size=0.15): - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(lidar_points) - down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) - return np.asarray(down_pcd.points) - -def filter_depth_points(lidar_points, max_human_depth=0.9): - - if lidar_points.shape[0] == 0: - return lidar_points - lidar_points_dist = lidar_points[:, 0] - min_dist = np.min(lidar_points_dist) - max_possible_dist = min_dist + max_human_depth - filtered_array = lidar_points[lidar_points_dist < max_possible_dist] - return filtered_array - -def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): - """ - Visualize a list of Open3D geometry objects in a dedicated window. - """ - vis = o3d.visualization.Visualizer() - vis.create_window(window_name=window_name, width=width, height=height) - for geom in geometries: - vis.add_geometry(geom) - opt = vis.get_render_option() - opt.point_size = point_size - vis.run() - vis.destroy_window() - -def pose_to_matrix(pose): - """ - Compose a 4x4 transformation matrix from a pose state. - Assumes pose has attributes: x, y, z, yaw, pitch, roll, - where the angles are given in degrees. - """ - # Use default values if any are None (e.g. if the car is not moving) - x = pose.x if pose.x is not None else 0.0 - y = pose.y if pose.y is not None else 0.0 - z = pose.z if pose.z is not None else 0.0 - if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: - yaw = math.radians(pose.yaw) - pitch = math.radians(pose.pitch) - roll = math.radians(pose.roll) - else: - yaw = 0.0 - pitch = 0.0 - roll = 0.0 - R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() - T = np.eye(4) - T[:3, :3] = R_mat - T[:3, 3] = np.array([x, y, z]) - return T - - -def transform_points_l2c(lidar_points, T_l2c): - N = lidar_points.shape[0] - pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) - pts_cam = (T_l2c @ pts_hom.T).T # (N,4) - return pts_cam[:, :3] - -# ----- New: Vectorized projection function ----- -def project_points(pts_cam, K, original_lidar_points): - """ - Vectorized version. - pts_cam: (N,3) array of points in camera coordinates. - original_lidar_points: (N,3) array of points in LiDAR coordinates. - Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. - """ - mask = pts_cam[:, 2] > 0 - pts_cam_valid = pts_cam[mask] - lidar_valid = original_lidar_points[mask] - Xc = pts_cam_valid[:, 0] - Yc = pts_cam_valid[:, 1] - Zc = pts_cam_valid[:, 2] - u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) - v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) - proj = np.column_stack((u, v, lidar_valid)) - return proj - - class YoloNode(): """ Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud @@ -392,7 +192,7 @@ def synchronized_callback(self, image_msg, lidar_msg): points_3d = roi_pts[:, 2:5] points_3d = filter_depth_points(points_3d, max_human_depth=0.8) refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10) - refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.03) + refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.1) if refined_cluster.shape[0] < 5: continue From 27f612e00761c9143e016db3afae8a3923a4cb9c Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 13 May 2025 17:01:28 -0500 Subject: [PATCH 33/55] Cleaned up bounding box creation process for YOLO node and moved to another file. Added rest of publishing code to combined_detection.py --- .../onboard/perception/combined_detection.py | 2 + .../perception/combined_detection_utils.py | 36 +++++++++++ GEMstack/onboard/perception/yolo_node.py | 60 +++++-------------- 3 files changed, 54 insertions(+), 44 deletions(-) create mode 100644 GEMstack/onboard/perception/combined_detection_utils.py diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 1e438f7d7..7dc8a8b81 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -195,6 +195,8 @@ def _fuse_bounding_boxes(self, for i, box in enumerate(fused_boxes_list): try: + fuxed_bb_array.boxes.append(box) + # Cur vehicle frame pos_x = box.pose.position.x; pos_y = box.pose.position.y; pos_z = box.pose.position.z quat_x = box.pose.orientation.x; quat_y = box.pose.orientation.y; quat_z = box.pose.orientation.z; quat_w = box.pose.orientation.w diff --git a/GEMstack/onboard/perception/combined_detection_utils.py b/GEMstack/onboard/perception/combined_detection_utils.py new file mode 100644 index 000000000..4c3910ef6 --- /dev/null +++ b/GEMstack/onboard/perception/combined_detection_utils.py @@ -0,0 +1,36 @@ +from jsk_recognition_msgs.msg import BoundingBox +from scipy.spatial.transform import Rotation as R + +# Seperated Sensor Fusion specific utilities into it's own file because package imports are required +# (to minimize integration problems) + +# Adds a new bounding box to a BoundingBoxArray +def add_bounding_box(boxes, frame_id, stamp, x, y, z, l, w, h, yaw, conf_score, label): + box_msg = BoundingBox() + box_msg.header.frame_id = frame_id + box_msg.header.stamp = stamp + + # Set the pose + box_msg.pose.position.x = float(x) + box_msg.pose.position.y = float(y) + box_msg.pose.position.z = float(z) + + # Convert yaw to quaternion + quat = R.from_euler('z', yaw).as_quat() + box_msg.pose.orientation.x = float(quat[0]) + box_msg.pose.orientation.y = float(quat[1]) + box_msg.pose.orientation.z = float(quat[2]) + box_msg.pose.orientation.w = float(quat[3]) + + # Set the dimensions + # Swapped dims[2] and dims[0] + box_msg.dimensions.x = float(l) # length + box_msg.dimensions.y = float(w) # width + box_msg.dimensions.z = float(h) # height + + # Add confidence score and label + box_msg.value = float(conf_score) + box_msg.label = label + + boxes.boxes.append(box_msg) + return boxes diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index 81830d423..b45a47a04 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -1,4 +1,5 @@ from pedestrian_utils import * +from combined_detection_utils import add_bounding_box from ultralytics import YOLO import cv2 from scipy.spatial.transform import Rotation as R @@ -9,8 +10,8 @@ import time # Publisher imports: -from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray -from geometry_msgs.msg import Pose, Vector3 +from jsk_recognition_msgs.msg import BoundingBoxArray +# from geometry_msgs.msg import Pose, Vector3 class YoloNode(): @@ -220,49 +221,20 @@ def synchronized_callback(self, image_msg, lidar_msg): yaw = np.arctan2(R_vehicle[1, 0], R_vehicle[0, 0]) refined_center = refined_center_vehicle - - # Create a ROS BoundingBox message - box_msg = BoundingBox() - box_msg.header.frame_id = 'currentVehicleFrame' - box_msg.header.stamp = lidar_msg.header.stamp - - # Set the pose - box_msg.pose.position.x = float(refined_center_vehicle[0]) - box_msg.pose.position.y = float(refined_center_vehicle[1]) - box_msg.pose.position.z = float(refined_center_vehicle[2]) - - # Convert yaw to quaternion - quat = R.from_euler('z', yaw).as_quat() - box_msg.pose.orientation.x = float(quat[0]) - box_msg.pose.orientation.y = float(quat[1]) - box_msg.pose.orientation.z = float(quat[2]) - box_msg.pose.orientation.w = float(quat[3]) - - # Set the dimensions - # Swapped dims[2] and dims[0] - box_msg.dimensions.x = float(dims[2]) # length - box_msg.dimensions.y = float(dims[1]) # width - box_msg.dimensions.z = float(dims[0]) # height - # if self.debug: - # print("X") - # print(refined_center_vehicle[0]) - # print("L") - # print(dims[0]) - # print("Y") - # print(refined_center_vehicle[1]) - # print("W") - # print(dims[1]) - # print("Z") - # print(refined_center_vehicle[2]) - # print("H") - # print(dims[2]) - - # Add confidence score and label - box_msg.value = float(conf_scores[i]) - box_msg.label = 0 # person/pedestrian class - - boxes.boxes.append(box_msg) + boxes = add_bounding_box(boxes=boxes, + frame_id='currentVehicleFrame', + stamp=lidar_msg.header.stamp, + x=refined_center_vehicle[0], + y=refined_center_vehicle[1], + z=refined_center_vehicle[2], + l=dims[2], # length + w=dims[1], # width + h=dims[0], # height + yaw=yaw, + conf_score=float(conf_scores[i]), + label=0 # person/pedestrian class + ) rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, {refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) with score {conf_scores[i]:.2f}") From 72477b2f545e0f3727903d054ad3d08cf987e0f8 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 13 May 2025 17:25:52 -0500 Subject: [PATCH 34/55] Added support for combined_detection_utils.py helper functions in point_pillars node --- .../onboard/perception/build_point_pillars.sh | 8 ++- .../onboard/perception/point_pillars_node.py | 55 +++++-------------- .../perception/setup/docker-compose.yaml | 3 +- 3 files changed, 23 insertions(+), 43 deletions(-) diff --git a/GEMstack/onboard/perception/build_point_pillars.sh b/GEMstack/onboard/perception/build_point_pillars.sh index c958ea836..829243f2f 100644 --- a/GEMstack/onboard/perception/build_point_pillars.sh +++ b/GEMstack/onboard/perception/build_point_pillars.sh @@ -1,12 +1,18 @@ #!/bin/bash -# Check if the point_pillars_node.py and model weights exist +# Check if the point_pillars_node.py, the helper functions file, and model weights exist if [ ! -f "point_pillars_node.py" ]; then echo "ERROR: point_pillars_node.py not found in the current directory!" echo "Please place your point_pillars_node.py file in the same directory as this script." exit 1 fi +if [ ! -f "combined_detection_utils.py" ]; then + echo "ERROR: combined_detection_utils.py not found in the current directory!" + echo "Please place your combined_detection_utils.py file in the same directory as this script." + exit 1 +fi + if [ ! -f "epoch_160.pth" ]; then echo "WARNING: epoch_160.pth model weights not found in the current directory!" echo "Please place your model weights file in the same directory as this script." diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index db5aad1c1..5ae609643 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -2,6 +2,7 @@ # from ..interface.gem import GEMInterface # from ..component import Component # from perception_utils import * +from combined_detection_utils import * import numpy as np from scipy.spatial.transform import Rotation as R import rospy @@ -204,47 +205,19 @@ def synchronized_callback(self, image_msg, lidar_msg): R_vehicle = self.T_l2v[:3, :3] @ R_lidar vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) - # Create a ROS BoundingBox message - box = BoundingBox() - box.header.frame_id = 'currentVehicleFrame' - box.header.stamp = lidar_msg.header.stamp - - # if self.debug: - # print("X VEHICLE") - # print(x_vehicle) - # print("L") - # print(l) - # print("Y VEHICLE") - # print(y_vehicle) - # print("W") - # print(w) - # print("Z VEHICLE") - # print(z_vehicle) - # print("H") - # print(h) - - # Set the pose (position and orientation) - box.pose.position.x = float(x_vehicle) - box.pose.position.y = float(y_vehicle) - box.pose.position.z = float(z_vehicle) - - # Convert yaw to quaternion - quat = R.from_euler('z', vehicle_yaw).as_quat() - box.pose.orientation.x = float(quat[0]) - box.pose.orientation.y = float(quat[1]) - box.pose.orientation.z = float(quat[2]) - box.pose.orientation.w = float(quat[3]) - - # Set the dimensions - box.dimensions.x = float(l) # length - box.dimensions.y = float(w) # width - box.dimensions.z = float(h) # height - - # Add class label and confidence score: - box.value = float(score) - box.label = int(label) - - boxes.boxes.append(box) + boxes = add_bounding_box(boxes=boxes, + frame_id='currentVehicleFrame', + stamp=lidar_msg.header.stamp, + x=x_vehicle, + y=y_vehicle, + z=z_vehicle, + l=l, # length + w=w, # width + h=h, # height + yaw=vehicle_yaw, + conf_score=score, + label=label # person/pedestrian class + ) rospy.loginfo(f"Pedestrian detected at ({x:.2f}, {y:.2f}, {z:.2f}) with score {score:.2f}") diff --git a/GEMstack/onboard/perception/setup/docker-compose.yaml b/GEMstack/onboard/perception/setup/docker-compose.yaml index 50d95d193..d877104f2 100644 --- a/GEMstack/onboard/perception/setup/docker-compose.yaml +++ b/GEMstack/onboard/perception/setup/docker-compose.yaml @@ -14,8 +14,9 @@ services: stdin_open: true tty: true volumes: - # Mount the point_pillars_node.py file and model weights + # Mount the point_pillars_node.py file, shared helper functions, and model weights - "../point_pillars_node.py:/home/ppuser/pointpillars_ws/point_pillars_node.py:ro" + - "../point_pillars_node.py:/home/ppuser/pointpillars_ws/combined_detection_utils.py:ro" - "../epoch_160.pth:/home/ppuser/pointpillars_ws/epoch_160.pth:ro" # Mount host home directory (optional) - "~:/home/ppuser/host:ro" From d86916380446214b28869fa62d95eea119cc2056 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 13 May 2025 17:31:19 -0500 Subject: [PATCH 35/55] Cleaned up unneeded camera matrices from point_pillars_node.py --- .../onboard/perception/point_pillars_node.py | 34 ------------------- 1 file changed, 34 deletions(-) diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 5ae609643..421a5cb79 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -104,47 +104,13 @@ def initialize(self): checkpoint = torch.load(model_path) #, map_location='cuda' if torch.cuda.is_available() else 'cpu') self.pointpillars.load_state_dict(checkpoint) - # if torch.cuda.is_available(): - # self.pointpillars = self.pointpillars.cuda() - self.pointpillars.eval() rospy.loginfo("PointPillars model loaded successfully") - if self.camera_front: - self.K = np.array([[684.83331299, 0., 573.37109375], - [0., 684.60968018, 363.70092773], - [0., 0., 1.]]) - else: - self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02], - [0.00000000e+00, 1.17514569e+03, 6.08580326e+02], - [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) - - if self.camera_front: - self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) - else: - self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05, - -6.19939758e-02]) - self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], [-0.02530848, 0.99965156, -0.00749882, 0.03773583], [-0.02379784, 0.00689664, 0.999693, 1.95320223], [0., 0., 0., 1.]]) - if self.camera_front: - self.T_l2c = np.array([ - [0.001090, -0.999489, -0.031941, 0.149698], - [-0.007664, 0.031932, -0.999461, -0.397813], - [0.999970, 0.001334, -0.007625, -0.691405], - [0., 0., 0., 1.000000] - ]) - else: - self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003], - [-0.09720448, 0.13371206, -0.98624154, -0.1598301], - [0.68884317, -0.7061996, -0.16363744, -1.04767285], - [0., 0., 0., 1.]] - ) - self.T_c2l = np.linalg.inv(self.T_l2c) - self.R_c2l = self.T_c2l[:3, :3] - self.camera_origin_in_lidar = self.T_c2l[:3, 3] # Subscribe to the RGB and LiDAR streams self.rgb_sub = Subscriber(rgb_topic, Image) From aa630ec6729302df06f3bc28af3855faf64f3e14 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 13 May 2025 19:34:02 -0500 Subject: [PATCH 36/55] Added orientation averaging, fixed a PP bbox height bug, fixed a combined plotting bug --- .../onboard/perception/combined_detection.py | 65 +++++++++++++++---- .../onboard/perception/point_pillars_node.py | 12 ++-- .../perception/setup/docker-compose.yaml | 2 +- GEMstack/onboard/perception/yolo_node.py | 2 +- 4 files changed, 60 insertions(+), 21 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 7dc8a8b81..a3cc5db42 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -13,8 +13,42 @@ import numpy as np from scipy.spatial.transform import Rotation as R from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray +from geometry_msgs.msg import Quaternion from .sensorFusion.eval_3d_bbox_performance import calculate_3d_iou +import copy + + +def average_yaw(yaw1, yaw2): + v1 = np.array([np.cos(yaw1), np.sin(yaw1)]) + v2 = np.array([np.cos(yaw2), np.sin(yaw2)]) + v_avg = (v1 + v2) / 2.0 + return np.arctan2(v_avg[1], v_avg[0]) + +def quaternion_to_yaw(quaternion_arr): + return R.from_quat(quaternion_arr).as_euler('zyx', degrees=False)[0] + +def avg_orientations(orientation1, orientation2): + # This function assumes both quaternions are 2D planar rotations around the z axis + + quat1 = [orientation1.x, orientation1.y, orientation1.z, orientation1.w] + quat2 = [orientation2.x, orientation2.y, orientation2.z, orientation2.w] + + yaw1 = quaternion_to_yaw(quaternion_arr=quat1) + yaw2 = quaternion_to_yaw(quaternion_arr=quat2) + + # Compute the average yaw and then convert back into quaternions + # (averaging quaternions directly causes problems) + avg_yaw = average_yaw(yaw1=yaw1, yaw2=yaw2) + avg_quat = R.from_euler('z', avg_yaw).as_quat() + + orientation = Quaternion() + orientation.x = float(avg_quat[0]) + orientation.y = float(avg_quat[1]) + orientation.z = float(avg_quat[2]) + orientation.w = float(avg_quat[3]) + + return orientation def merge_boxes(box1: BoundingBox, box2: BoundingBox) -> BoundingBox: # TODO: merging @@ -29,8 +63,10 @@ def merge_boxes(box1: BoundingBox, box2: BoundingBox) -> BoundingBox: merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0 merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0 merged_box.pose.position.z = (box1.pose.position.z + box2.pose.position.z) / 2.0 - # Avg orientation (quaternions) - merged_box.pose.orientation = box1.pose.orientation + + # Avg orientations (quaternions) + merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) + merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0 merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0 merged_box.dimensions.z = (box1.dimensions.z + box2.dimensions.z) / 2.0 @@ -76,8 +112,8 @@ def __init__( self.use_start_frame = use_start_frame self.iou_threshold = iou_threshold - self.yolo_topic = "/yolo_boxes" - self.pp_topic = "/pointpillars_boxes" + self.yolo_topic = '/yolo_boxes' + self.pp_topic = '/pointpillars_boxes' self.debug = False rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.") @@ -114,8 +150,8 @@ def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: Bo def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: current_time = self.vehicle_interface.time() - yolo_bbx_array = self.latest_yolo_bbxs - pp_bbx_array = self.latest_pp_bbxs + yolo_bbx_array = copy.deepcopy(self.latest_yolo_bbxs) + pp_bbx_array = copy.deepcopy(self.latest_pp_bbxs) if yolo_bbx_array is None or pp_bbx_array is None: return {} @@ -128,12 +164,13 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: current_frame_agents = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) - if self.enable_tracking: - self._update_tracking(current_frame_agents) - else: - self.tracked_agents = current_frame_agents # NOTE: No deepcopy + return {} + # if self.enable_tracking: + # self._update_tracking(current_frame_agents) + # else: + # self.tracked_agents = current_frame_agents # NOTE: No deepcopy - return self.tracked_agents + # return self.tracked_agents def _fuse_bounding_boxes(self, @@ -191,12 +228,12 @@ def _fuse_bounding_boxes(self, # Work in progress to visualize combined results fused_bb_array = BoundingBoxArray() fused_bb_array.header = original_header - fused_bb_array.boxes = fused_boxes_list for i, box in enumerate(fused_boxes_list): + fused_bb_array.boxes.append(box) + rospy.loginfo(len(fused_boxes_list)) + try: - fuxed_bb_array.boxes.append(box) - # Cur vehicle frame pos_x = box.pose.position.x; pos_y = box.pose.position.y; pos_z = box.pose.position.z quat_x = box.pose.orientation.x; quat_y = box.pose.orientation.y; quat_z = box.pose.orientation.z; quat_w = box.pose.orientation.w diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 421a5cb79..9f76845f6 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -1,7 +1,3 @@ -# from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum -# from ..interface.gem import GEMInterface -# from ..component import Component -# from perception_utils import * from combined_detection_utils import * import numpy as np from scipy.spatial.transform import Rotation as R @@ -86,7 +82,7 @@ def initialize(self): # Initialize PointPillars node rospy.init_node('pointpillars_box_publisher') # Create bounding box publisher - self.pub = rospy.Publisher('/pointpillars_boxes', BoundingBoxArray, queue_size=10) + self.pub = rospy.Publisher('/pointpillars_boxes', BoundingBoxArray, queue_size=1) rospy.loginfo("PointPillars node initialized and waiting for messages.") # Initialize PointPillars @@ -171,6 +167,12 @@ def synchronized_callback(self, image_msg, lidar_msg): R_vehicle = self.T_l2v[:3, :3] @ R_lidar vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + print("printing") + print(z_vehicle) + print(h) + if (z_vehicle - h/2) < 0.0: + z_vehicle = h/2 + boxes = add_bounding_box(boxes=boxes, frame_id='currentVehicleFrame', stamp=lidar_msg.header.stamp, diff --git a/GEMstack/onboard/perception/setup/docker-compose.yaml b/GEMstack/onboard/perception/setup/docker-compose.yaml index d877104f2..d2738db54 100644 --- a/GEMstack/onboard/perception/setup/docker-compose.yaml +++ b/GEMstack/onboard/perception/setup/docker-compose.yaml @@ -16,7 +16,7 @@ services: volumes: # Mount the point_pillars_node.py file, shared helper functions, and model weights - "../point_pillars_node.py:/home/ppuser/pointpillars_ws/point_pillars_node.py:ro" - - "../point_pillars_node.py:/home/ppuser/pointpillars_ws/combined_detection_utils.py:ro" + - "../combined_detection_utils.py:/home/ppuser/pointpillars_ws/combined_detection_utils.py:ro" - "../epoch_160.pth:/home/ppuser/pointpillars_ws/epoch_160.pth:ro" # Mount host home directory (optional) - "~:/home/ppuser/host:ro" diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index b45a47a04..e6a689e57 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -51,7 +51,7 @@ def initialize(self): # Initialize YOLO node rospy.init_node('yolo_box_publisher') # Create bounding box publisher - self.pub = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=10) + self.pub = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=1) rospy.loginfo("YOLO node initialized and waiting for messages.") if self.camera_front: From b3c020993b47ff68d9963629a48597791c17bf05 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Tue, 13 May 2025 20:59:21 -0500 Subject: [PATCH 37/55] Added id tracking to combined_detection.py. Decreased iou_threshold so bounding boxes are fused more often. --- .../onboard/perception/combined_detection.py | 132 ++++++++++-------- 1 file changed, 74 insertions(+), 58 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index a3cc5db42..f8a88ccdd 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -3,6 +3,7 @@ from ..component import Component # from .perception_utils import * from .pedestrian_utils_gem import * +from .pedestrian_utils import pose_to_matrix from typing import Dict import rospy from message_filters import Subscriber, ApproximateTimeSynchronizer @@ -97,16 +98,18 @@ def __init__( vehicle_interface: GEMInterface, enable_tracking: bool = True, use_start_frame: bool = True, - iou_threshold: float = 0.1, + iou_threshold: float = 0.001, **kwargs ): self.vehicle_interface = vehicle_interface self.tracked_agents: Dict[str, AgentState] = {} - self.ped_counter = 0 + self.pedestrian_counter = 0 self.latest_yolo_bbxs: Optional[BoundingBoxArray] = None self.latest_pp_bbxs: Optional[BoundingBoxArray] = None self.start_pose_abs: Optional[ObjectPose] = None self.start_time: Optional[float] = None + self.current_agents = {} + self.tracked_agents = {} self.enable_tracking = enable_tracking self.use_start_frame = use_start_frame @@ -162,16 +165,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: self.start_pose_abs = vehicle.pose rospy.loginfo("CombinedDetector3D latched start pose.") - current_frame_agents = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) - - return {} - # if self.enable_tracking: - # self._update_tracking(current_frame_agents) - # else: - # self.tracked_agents = current_frame_agents # NOTE: No deepcopy - - # return self.tracked_agents - + return self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) def _fuse_bounding_boxes(self, yolo_bbx_array: BoundingBoxArray, @@ -180,7 +174,7 @@ def _fuse_bounding_boxes(self, current_time: float ) -> Dict[str, AgentState]: original_header = yolo_bbx_array.header - current_agents_in_frame: Dict[str, AgentState] = {} + agents: Dict[str, AgentState] = {} yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes pp_boxes: List[BoundingBox] = pp_bbx_array.boxes @@ -239,63 +233,85 @@ def _fuse_bounding_boxes(self, quat_x = box.pose.orientation.x; quat_y = box.pose.orientation.y; quat_z = box.pose.orientation.z; quat_w = box.pose.orientation.w yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) - # Start frame - if self.use_start_frame and self.start_pose_abs is not None: - vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( - ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs - ) - T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) - object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) - object_pose_start_h = T_vehicle_to_start @ object_pose_current_h - final_x, final_y, final_z = object_pose_start_h[:3, 0] - else: - final_x, final_y, final_z = pos_x, pos_y, pos_z + # Convert to start frame + vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( + ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) + object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) + object_pose_start_h = T_vehicle_to_start @ object_pose_current_h + final_x, final_y, final_z = object_pose_start_h[:3, 0] - final_pose = ObjectPose( + new_pose = ObjectPose( t=current_time, x=final_x, y=final_y, z=final_z, yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum ) dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z) - ######### Mapping based on label (integer) from BoundingBox msg - agent_type = AgentEnum.PEDESTRIAN if box.label == 0 else AgentEnum.UNKNOWN # Needs refinement - activity = AgentActivityEnum.UNKNOWN # Placeholder - - # temp id - # _update_tracking assign persistent IDs - temp_agent_id = f"pedestrian{i}" - - current_agents_in_frame[temp_agent_id] = AgentState( - pose=final_pose, dimensions=dims, outline=None, type=agent_type, - activity=activity, velocity=(0.0,0.0,0.0), yaw_rate=0.0 - # score=box.value # score + + new_pose = ObjectPose( + t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0, + yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum + ) + dims[2] = dims[2] * 2.0 # AgentState has z center on the floor and height is full height. + + existing_id = match_existing_pedestrian( + new_center=np.array([new_pose.x, new_pose.y, new_pose.z]), + new_dims=dims, + existing_agents=self.tracked_agents, + distance_threshold=2.0 ) + + if existing_id is not None: + old_state = self.tracked_agents[existing_id] + dt = new_pose.t - old_state.pose.t + vx, vy, vz = compute_velocity(old_state.pose, new_pose, dt) + updated_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(vx, vy, vz), + yaw_rate=0 + ) + agents[existing_id] = updated_agent + self.tracked_agents[existing_id] = updated_agent + else: + agent_id = f"pedestrian{self.pedestrian_counter}" + self.pedestrian_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + self.tracked_agents[agent_id] = new_agent + except Exception as e: rospy.logwarn(f"Failed to convert final BoundingBox {i} to AgentState: {e}") continue self.pub_fused.publish(fused_bb_array) - return current_agents_in_frame - - - def _update_tracking(self, current_frame_agents: Dict[str, AgentState]): - - # Todo tracking - ## Match 'current_frame_agents' to 'self.tracked_agents'. - ## - Use position (already in correct START or CURRENT frame), maybe size/type. - ## - Need a matching algorithm (e.g., nearest neighbor within radius, Hungarian). - ## For matched pairs: - ## - Update the existing agent in 'self.tracked_agents' (e.g., smooth pose, update timestamp). - ## For unmatched 'current_frame_agents': - ## - These are new detections. Assign a persistent ID (e.g., f"Ped_{self.ped_counter}"). - ## - Increment self.ped_counter. - ## - Add them to 'self.tracked_agents'. - ## For unmatched 'self.tracked_agents' (agents not seen this frame): - ## - Increment a 'missed frames' counter or check timestamp. - ## - If missed for too long (e.g., > 1 second), remove from 'self.tracked_agents'. - - # return without tracking - self.tracked_agents = current_frame_agents + stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() + if current_time - agent.pose.t > 5.0] + for agent_id in stale_ids: + rospy.loginfo(f"Removing stale agent: {agent_id}\n") + for agent_id, agent in agents.items(): + p = agent.pose + # Format pose and velocity with 3 decimals (or as needed) + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + ) + + return agents # Fake 2D Combined Detector for testing purposes From 6bc8a7986090035f4ff8ede307e0ed83d2eb69c3 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Tue, 13 May 2025 21:18:21 -0500 Subject: [PATCH 38/55] combined_detection.yaml Add model merging mode --- launch/combined_detection.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/launch/combined_detection.yaml b/launch/combined_detection.yaml index 8c8320992..856835e6d 100644 --- a/launch/combined_detection.yaml +++ b/launch/combined_detection.yaml @@ -18,6 +18,7 @@ drive: # optional overrides enable_tracking: True # True if you want to enable tracking function use_start_frame: True # True if you need output in START frame, otherwise in CURRENT frame + merge_mode: "Score" # Merging strategy to use: "Average", "Score", or "Max" perception_normalization : StandardPerceptionNormalizer planning: @@ -122,4 +123,4 @@ variants: relations_estimation: type: pedestrian_yield_logic.PedestrianYielder args: - mode: 'sim' \ No newline at end of file + mode: 'sim' From 7a5b685e2c49f32a5baabeab7a7f6052690c35e8 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Tue, 13 May 2025 21:19:12 -0500 Subject: [PATCH 39/55] combined_detection.py Add model output merging logic --- .../onboard/perception/combined_detection.py | 237 +++++++++++------- 1 file changed, 141 insertions(+), 96 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index f8a88ccdd..db0c2712e 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -3,7 +3,6 @@ from ..component import Component # from .perception_utils import * from .pedestrian_utils_gem import * -from .pedestrian_utils import pose_to_matrix from typing import Dict import rospy from message_filters import Subscriber, ApproximateTimeSynchronizer @@ -51,30 +50,87 @@ def avg_orientations(orientation1, orientation2): return orientation -def merge_boxes(box1: BoundingBox, box2: BoundingBox) -> BoundingBox: - # TODO: merging - # Heuristics- Average pose - # Average dimensions - # Use highest score - # Label specific logic - merged_box = BoundingBox() - merged_box.header = box1.header # Use header from one input - - ## Avg position, average dimensions, max score, box1 label - merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0 - merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0 - merged_box.pose.position.z = (box1.pose.position.z + box2.pose.position.z) / 2.0 +def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") -> BoundingBox: + """ + Merge two bounding boxes using the specified mode. + + Args: + box1: First bounding box + box2: Second bounding box + mode: Merging strategy to use. Options include: + - "Average": Simple average of position, dimensions, and orientation (default) + - "Score": Weight average by confidence scores (box.value) + - "Max": Use the entire box with the highest confidence score + + Notes: + - The 'value' field in BoundingBox is used for the confidence score + - The label from the box with higher confidence is used when using Score mode + - In Average mode, the label from the first box is used + - Max mode returns a copy of the box with the highest confidence score + + Returns: + A merged bounding box + """ - # Avg orientations (quaternions) - merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) + merged_box = BoundingBox() + merged_box.header = box1.header # Use header from one input + + # Get confidence scores + score1 = box1.value if hasattr(box1, 'value') else 0.0 + score2 = box2.value if hasattr(box2, 'value') else 0.0 + + if mode == "Max": + # Use the box with the higher confidence score entirely + if score1 >= score2: + return copy.deepcopy(box1) + else: + return copy.deepcopy(box2) + + elif mode == "Score": + # If both scores are 0, fall back to average + if score1 == 0.0 and score2 == 0.0: + weight1, weight2 = 0.5, 0.5 + else: + # Calculate normalized weights based on scores + total_score = score1 + score2 + weight1 = score1 / total_score + weight2 = score2 / total_score + + # Weighted average of position + merged_box.pose.position.x = (weight1 * box1.pose.position.x) + (weight2 * box2.pose.position.x) + merged_box.pose.position.y = (weight1 * box1.pose.position.y) + (weight2 * box2.pose.position.y) + merged_box.pose.position.z = (weight1 * box1.pose.position.z) + (weight2 * box2.pose.position.z) + + # Weighted average of dimensions + merged_box.dimensions.x = (weight1 * box1.dimensions.x) + (weight2 * box2.dimensions.x) + merged_box.dimensions.y = (weight1 * box1.dimensions.y) + (weight2 * box2.dimensions.y) + merged_box.dimensions.z = (weight1 * box1.dimensions.z) + (weight2 * box2.dimensions.z) + + # For orientation, we still use the average_orientations function + # A more advanced approach would be to implement weighted quaternion averaging + merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) + + # For label, use the one from the higher-score box + merged_box.label = box1.label if score1 >= score2 else box2.label + + else: # Default to "Average" mode + # Original averaging logic + merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0 + merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0 + merged_box.pose.position.z = (box1.pose.position.z + box2.pose.position.z) / 2.0 + + # Avg orientations (quaternions) + merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) - merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0 - merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0 - merged_box.dimensions.z = (box1.dimensions.z + box2.dimensions.z) / 2.0 - merged_box.value = max(box1.value, box2.value) # Max score - merged_box.label = box1.label # Label from first box + merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0 + merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0 + merged_box.dimensions.z = (box1.dimensions.z + box2.dimensions.z) / 2.0 + merged_box.label = box1.label # Label from first box + + # Always use max score for the merged box confidence value + merged_box.value = max(score1, score2) - return merged_box + return merged_box def get_aabb_corners(box: BoundingBox): """ @@ -98,28 +154,29 @@ def __init__( vehicle_interface: GEMInterface, enable_tracking: bool = True, use_start_frame: bool = True, - iou_threshold: float = 0.001, + iou_threshold: float = 0.1, + merge_mode: str = "Average", **kwargs ): self.vehicle_interface = vehicle_interface self.tracked_agents: Dict[str, AgentState] = {} - self.pedestrian_counter = 0 + self.ped_counter = 0 self.latest_yolo_bbxs: Optional[BoundingBoxArray] = None self.latest_pp_bbxs: Optional[BoundingBoxArray] = None self.start_pose_abs: Optional[ObjectPose] = None self.start_time: Optional[float] = None - self.current_agents = {} - self.tracked_agents = {} self.enable_tracking = enable_tracking self.use_start_frame = use_start_frame self.iou_threshold = iou_threshold + self.merge_mode = merge_mode self.yolo_topic = '/yolo_boxes' self.pp_topic = '/pointpillars_boxes' self.debug = False rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.") + rospy.loginfo(f"Using merge mode: {self.merge_mode}") def rate(self) -> float: return 8.0 @@ -165,7 +222,16 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: self.start_pose_abs = vehicle.pose rospy.loginfo("CombinedDetector3D latched start pose.") - return self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) + current_frame_agents = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) + + return {} + # if self.enable_tracking: + # self._update_tracking(current_frame_agents) + # else: + # self.tracked_agents = current_frame_agents # NOTE: No deepcopy + + # return self.tracked_agents + def _fuse_bounding_boxes(self, yolo_bbx_array: BoundingBoxArray, @@ -174,7 +240,7 @@ def _fuse_bounding_boxes(self, current_time: float ) -> Dict[str, AgentState]: original_header = yolo_bbx_array.header - agents: Dict[str, AgentState] = {} + current_agents_in_frame: Dict[str, AgentState] = {} yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes pp_boxes: List[BoundingBox] = pp_bbx_array.boxes @@ -204,7 +270,8 @@ def _fuse_bounding_boxes(self, rospy.logdebug(f"Matched YOLO box {i} with PP box {best_match_j} (IoU: {best_iou:.3f})") matched_yolo_indices.add(i) matched_pp_indices.add(best_match_j) - merged = merge_boxes(yolo_box, pp_boxes[best_match_j]) + rospy.logdebug(f"Using bbox merge mode: {self.merge_mode} for boxes with scores {yolo_box.value:.2f} and {pp_boxes[best_match_j].value:.2f}") + merged = merge_boxes(yolo_box, pp_boxes[best_match_j], mode=self.merge_mode) fused_boxes_list.append(merged) ## UAdd the unmatched YOLO boxes @@ -233,85 +300,63 @@ def _fuse_bounding_boxes(self, quat_x = box.pose.orientation.x; quat_y = box.pose.orientation.y; quat_z = box.pose.orientation.z; quat_w = box.pose.orientation.w yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) - # Convert to start frame - vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( - ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs - ) - T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) - object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) - object_pose_start_h = T_vehicle_to_start @ object_pose_current_h - final_x, final_y, final_z = object_pose_start_h[:3, 0] + # Start frame + if self.use_start_frame and self.start_pose_abs is not None: + vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( + ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) + object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) + object_pose_start_h = T_vehicle_to_start @ object_pose_current_h + final_x, final_y, final_z = object_pose_start_h[:3, 0] + else: + final_x, final_y, final_z = pos_x, pos_y, pos_z - new_pose = ObjectPose( + final_pose = ObjectPose( t=current_time, x=final_x, y=final_y, z=final_z, yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum ) dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z) - - new_pose = ObjectPose( - t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0, - yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum - ) - dims[2] = dims[2] * 2.0 # AgentState has z center on the floor and height is full height. - - existing_id = match_existing_pedestrian( - new_center=np.array([new_pose.x, new_pose.y, new_pose.z]), - new_dims=dims, - existing_agents=self.tracked_agents, - distance_threshold=2.0 + ######### Mapping based on label (integer) from BoundingBox msg + agent_type = AgentEnum.PEDESTRIAN if box.label == 0 else AgentEnum.UNKNOWN # Needs refinement + activity = AgentActivityEnum.UNKNOWN # Placeholder + + # temp id + # _update_tracking assign persistent IDs + temp_agent_id = f"pedestrian{i}" + + current_agents_in_frame[temp_agent_id] = AgentState( + pose=final_pose, dimensions=dims, outline=None, type=agent_type, + activity=activity, velocity=(0.0,0.0,0.0), yaw_rate=0.0 + # score=box.value # score ) - - if existing_id is not None: - old_state = self.tracked_agents[existing_id] - dt = new_pose.t - old_state.pose.t - vx, vy, vz = compute_velocity(old_state.pose, new_pose, dt) - updated_agent = AgentState( - pose=new_pose, - dimensions=dims, - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, - velocity=(vx, vy, vz), - yaw_rate=0 - ) - agents[existing_id] = updated_agent - self.tracked_agents[existing_id] = updated_agent - else: - agent_id = f"pedestrian{self.pedestrian_counter}" - self.pedestrian_counter += 1 - new_agent = AgentState( - pose=new_pose, - dimensions=dims, - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, - velocity=(0, 0, 0), - yaw_rate=0 - ) - agents[agent_id] = new_agent - self.tracked_agents[agent_id] = new_agent - except Exception as e: rospy.logwarn(f"Failed to convert final BoundingBox {i} to AgentState: {e}") continue self.pub_fused.publish(fused_bb_array) + return current_agents_in_frame + + + def _update_tracking(self, current_frame_agents: Dict[str, AgentState]): + + # Todo tracking + ## Match 'current_frame_agents' to 'self.tracked_agents'. + ## - Use position (already in correct START or CURRENT frame), maybe size/type. + ## - Need a matching algorithm (e.g., nearest neighbor within radius, Hungarian). + ## For matched pairs: + ## - Update the existing agent in 'self.tracked_agents' (e.g., smooth pose, update timestamp). + ## For unmatched 'current_frame_agents': + ## - These are new detections. Assign a persistent ID (e.g., f"Ped_{self.ped_counter}"). + ## - Increment self.ped_counter. + ## - Add them to 'self.tracked_agents'. + ## For unmatched 'self.tracked_agents' (agents not seen this frame): + ## - Increment a 'missed frames' counter or check timestamp. + ## - If missed for too long (e.g., > 1 second), remove from 'self.tracked_agents'. + + # return without tracking + self.tracked_agents = current_frame_agents - stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() - if current_time - agent.pose.t > 5.0] - for agent_id in stale_ids: - rospy.loginfo(f"Removing stale agent: {agent_id}\n") - for agent_id, agent in agents.items(): - p = agent.pose - # Format pose and velocity with 3 decimals (or as needed) - rospy.loginfo( - f"Agent ID: {agent_id}\n" - f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " - f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" - ) - - return agents # Fake 2D Combined Detector for testing purposes From f95cc0c7fd33aac199da22deb7c938816b30c413 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Tue, 13 May 2025 23:45:20 -0500 Subject: [PATCH 40/55] point_pillars_node.py clean up --- .../onboard/perception/point_pillars_node.py | 82 +++++++++++-------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 9f76845f6..22a49ae37 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -1,4 +1,4 @@ -from combined_detection_utils import * +from combined_detection_utils import add_bounding_box import numpy as np from scipy.spatial.transform import Rotation as R import rospy @@ -50,25 +50,24 @@ def pc2_to_numpy_with_intensity(pc2_msg, want_rgb=False): return pts[mask] -class PointPillarsNode(): +class PointPillarsNode: """ Detects Pedestrians with PointPillars and publishes the results in vehicle frame. """ - def __init__( - self, - ): - self.latest_image = None - self.latest_lidar = None - self.bridge = CvBridge() + def __init__(self): + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() self.camera_name = 'front' - self.camera_front = (self.camera_name=='front') + self.camera_front = (self.camera_name == 'front') self.score_threshold = 0.4 self.debug = True self.initialize() def initialize(self): - # --- Determine the correct RGB topic for this camera --- + """Initialize the PointPillars node with model and ROS connections.""" + # Determine the correct RGB topic for this camera rgb_topic_map = { 'front': '/oak/rgb/image_raw', 'front_right': '/camera_fr/arena_camera_node/image_raw', @@ -81,6 +80,7 @@ def initialize(self): # Initialize PointPillars node rospy.init_node('pointpillars_box_publisher') + # Create bounding box publisher self.pub = rospy.Publisher('/pointpillars_boxes', BoundingBoxArray, queue_size=1) rospy.loginfo("PointPillars node initialized and waiting for messages.") @@ -88,45 +88,53 @@ def initialize(self): # Initialize PointPillars device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.pointpillars = PointPillars( - nclasses=3, - voxel_size=[0.16, 0.16, 4], - point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], - max_num_points=32, - max_voxels=(16000, 40000) - ) + nclasses=3, + voxel_size=[0.16, 0.16, 4], + point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], + max_num_points=32, + max_voxels=(16000, 40000) + ) self.pointpillars.to(device) + # Load model weights model_path = 'epoch_160.pth' - checkpoint = torch.load(model_path) #, map_location='cuda' if torch.cuda.is_available() else 'cpu') + checkpoint = torch.load(model_path) self.pointpillars.load_state_dict(checkpoint) - self.pointpillars.eval() rospy.loginfo("PointPillars model loaded successfully") - self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], - [-0.02530848, 0.99965156, -0.00749882, 0.03773583], - [-0.02379784, 0.00689664, 0.999693, 1.95320223], - [0., 0., 0., 1.]]) + # Transformation matrix from LiDAR to vehicle frame + self.T_l2v = np.array([ + [0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0., 0., 0., 1.] + ]) # Subscribe to the RGB and LiDAR streams self.rgb_sub = Subscriber(rgb_topic, Image) self.lidar_sub = Subscriber('/ouster/points', PointCloud2) - self.sync = ApproximateTimeSynchronizer([ - self.rgb_sub, self.lidar_sub - ], queue_size=10, slop=0.1) + self.sync = ApproximateTimeSynchronizer( + [self.rgb_sub, self.lidar_sub], + queue_size=10, + slop=0.1 + ) self.sync.registerCallback(self.synchronized_callback) def synchronized_callback(self, image_msg, lidar_msg): + """Process synchronized RGB and LiDAR messages to detect pedestrians.""" rospy.loginfo("Received synchronized RGB and LiDAR messages") + + # Convert LiDAR message to numpy array self.latest_lidar = pc2_to_numpy_with_intensity(lidar_msg, want_rgb=False) downsample = False - if downsample: lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) else: lidar_down = self.latest_lidar.copy() + # Create empty list of bounding boxes to fill boxes = BoundingBoxArray() boxes.header.frame_id = 'currentVehicleFrame' boxes.header.stamp = lidar_msg.header.stamp @@ -167,24 +175,27 @@ def synchronized_callback(self, image_msg, lidar_msg): R_vehicle = self.T_l2v[:3, :3] @ R_lidar vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) - print("printing") - print(z_vehicle) - print(h) + # print("printing") + # print(z_vehicle) + # print(h) + # Ensure the box doesn't go below ground level if (z_vehicle - h/2) < 0.0: z_vehicle = h/2 - boxes = add_bounding_box(boxes=boxes, + # Add the bounding box + boxes = add_bounding_box( + boxes=boxes, frame_id='currentVehicleFrame', stamp=lidar_msg.header.stamp, x=x_vehicle, y=y_vehicle, z=z_vehicle, - l=l, # length - w=w, # width - h=h, # height + l=l, # length + w=w, # width + h=h, # height yaw=vehicle_yaw, conf_score=score, - label=label # person/pedestrian class + label=label # person/pedestrian class ) rospy.loginfo(f"Pedestrian detected at ({x:.2f}, {y:.2f}, {z:.2f}) with score {score:.2f}") @@ -193,9 +204,10 @@ def synchronized_callback(self, image_msg, lidar_msg): rospy.loginfo(f"Publishing {len(boxes.boxes)} pedestrian bounding boxes") self.pub.publish(boxes) + if __name__ == '__main__': try: node = PointPillarsNode() rospy.spin() except rospy.ROSInterruptException: - pass \ No newline at end of file + pass From c2f2b2b63283ab059d35e339200cff83bce26ace Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Tue, 13 May 2025 23:45:53 -0500 Subject: [PATCH 41/55] yolo_node.py update transform matrices + clean up --- GEMstack/onboard/perception/yolo_node.py | 90 ++++++++++++++---------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index e6a689e57..9792d5172 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -24,20 +24,21 @@ class YoloNode(): and return only detections from the current frame. """ - def __init__( - self, - ): - self.latest_image = None - self.latest_lidar = None - self.bridge = CvBridge() + def __init__(self): + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() self.camera_name = 'front' - self.camera_front = (self.camera_name=='front') + self.camera_front = (self.camera_name == 'front') self.score_threshold = 0.4 self.debug = True + # self.undistort_map1 = None + # self.undistort_map2 = None self.initialize() def initialize(self): - # --- Determine the correct RGB topic for this camera --- + """Initialize the YOLO node with camera calibration and ROS connections.""" + # # --- Determine the correct RGB topic for this camera --- rgb_topic_map = { 'front': '/oak/rgb/image_raw', 'front_right': '/camera_fr/arena_camera_node/image_raw', @@ -50,29 +51,31 @@ def initialize(self): # Initialize YOLO node rospy.init_node('yolo_box_publisher') + # Create bounding box publisher self.pub = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=1) rospy.loginfo("YOLO node initialized and waiting for messages.") + # Set camera intrinsic parameters based on camera if self.camera_front: self.K = np.array([[684.83331299, 0., 573.37109375], [0., 684.60968018, 363.70092773], [0., 0., 1.]]) + self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) else: self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02], [0.00000000e+00, 1.17514569e+03, 6.08580326e+02], [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) + self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, + -7.41246708e-05, -6.19939758e-02]) - if self.camera_front: - self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) - else: - self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, -7.41246708e-05, - -6.19939758e-02]) - + # Transformation matrix from LiDAR to vehicle frame self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], [-0.02530848, 0.99965156, -0.00749882, 0.03773583], [-0.02379784, 0.00689664, 0.999693, 1.95320223], [0., 0., 0., 1.]]) + + # Transformation matrix from LiDAR to camera frame if self.camera_front: self.T_l2c = np.array([ [0.001090, -0.999489, -0.031941, 0.149698], @@ -86,12 +89,14 @@ def initialize(self): [0.68884317, -0.7061996, -0.16363744, -1.04767285], [0., 0., 0., 1.]] ) + + # Compute inverse transformation (camera to LiDAR) self.T_c2l = np.linalg.inv(self.T_l2c) self.R_c2l = self.T_c2l[:3, :3] self.camera_origin_in_lidar = self.T_c2l[:3, 3] - # Initialize the YOLO detector - self.detector = YOLO('yolov8n.pt') # 'GEMstack/knowledge/detection/cone.pt') + # Initialize the YOLO detector and move to GPU if available + self.detector = YOLO('yolov8n.pt') self.detector.to('cuda') # Subscribe to the RGB and LiDAR streams @@ -103,28 +108,31 @@ def initialize(self): self.sync.registerCallback(self.synchronized_callback) def synchronized_callback(self, image_msg, lidar_msg): + """Process synchronized RGB and LiDAR messages to detect pedestrians.""" rospy.loginfo("Received synchronized RGB and LiDAR messages") + + # Convert image message to OpenCV format try: self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") except Exception as e: - rospy.logerr("Failed to convert image: {}".format(e)) + rospy.logerr(f"Failed to convert image: {e}") self.latest_image = None + + # Convert LiDAR message to numpy array self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) # Gate guards against data not being present for both sensors: if self.latest_image is None or self.latest_lidar is None: - return {} - lastest_image = self.latest_image.copy() + return {} # Skip + + latest_image = self.latest_image.copy() + # Optionally downsample LiDAR points downsample = False if downsample: lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) else: lidar_down = self.latest_lidar.copy() - - # if self.start_time is None: - # self.start_time = current_time - # time_elapsed = current_time - self.start_time if self.camera_front == False: start = time.time() @@ -137,10 +145,12 @@ def synchronized_callback(self, image_msg, lidar_msg): # --- Begin modifications for three-angle detection --- img_normal = undistorted_img else: - img_normal = lastest_image.copy() - undistorted_img = lastest_image.copy() - orig_H, orig_W = lastest_image.shape[:2] + img_normal = latest_image.copy() + undistorted_img = latest_image.copy() + orig_H, orig_W = latest_image.shape[:2] self.current_K = self.K + + # Run YOLO detection on the image results_normal = self.detector(img_normal, conf=0.4, classes=[0]) combined_boxes = [] @@ -159,6 +169,7 @@ def synchronized_callback(self, image_msg, lidar_msg): # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame + # Create empty list of bounding boxes to fill and publish later boxes = BoundingBoxArray() boxes.header.frame_id = 'currentVehicleFrame' @@ -181,8 +192,8 @@ def synchronized_callback(self, image_msg, lidar_msg): bottom = int(cy + h / 2) # Find LiDAR points that project to this box - mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ - (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) + mask = ((projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & + (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)) roi_pts = projected_pts[mask] # Ignore regions with too few points @@ -220,31 +231,36 @@ def synchronized_callback(self, image_msg, lidar_msg): # yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) yaw = np.arctan2(R_vehicle[1, 0], R_vehicle[0, 0]) - refined_center = refined_center_vehicle - - boxes = add_bounding_box(boxes=boxes, + # Add the bounding box + boxes = add_bounding_box( + boxes=boxes, frame_id='currentVehicleFrame', stamp=lidar_msg.header.stamp, x=refined_center_vehicle[0], y=refined_center_vehicle[1], z=refined_center_vehicle[2], - l=dims[2], # length - w=dims[1], # width - h=dims[0], # height + l=dims[2], # length + w=dims[1], # width + h=dims[0], # height yaw=yaw, conf_score=float(conf_scores[i]), - label=0 # person/pedestrian class + label=0 # person/pedestrian class ) - rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, {refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) with score {conf_scores[i]:.2f}") + rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, " + f"{refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) " + f"with score {conf_scores[i]:.2f}") # Publish the bounding boxes rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes") self.pub.publish(boxes) def undistort_image(self, image, K, D): + """Undistort an image using the camera calibration parameters.""" h, w = image.shape[:2] newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + + # Initialize undistortion maps if not already done if self.undistort_map1 is None or self.undistort_map2 is None: self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, newCameraMatrix=newK, size=(w, h), @@ -262,4 +278,4 @@ def undistort_image(self, image, K, D): node = YoloNode() rospy.spin() except rospy.ROSInterruptException: - pass \ No newline at end of file + pass From b990e355dac3802f44b5694ee7917c08befc47b7 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Tue, 13 May 2025 23:46:37 -0500 Subject: [PATCH 42/55] combined_detection.py clean up --- .../onboard/perception/combined_detection.py | 96 +++++++++++++------ 1 file changed, 67 insertions(+), 29 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index db0c2712e..f2eb529d5 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -3,13 +3,12 @@ from ..component import Component # from .perception_utils import * from .pedestrian_utils_gem import * -from typing import Dict +from typing import Dict, List, Optional, Tuple import rospy from message_filters import Subscriber, ApproximateTimeSynchronizer import time import os import yaml -from typing import Dict, List, Optional, Tuple import numpy as np from scipy.spatial.transform import Rotation as R from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray @@ -25,12 +24,14 @@ def average_yaw(yaw1, yaw2): v_avg = (v1 + v2) / 2.0 return np.arctan2(v_avg[1], v_avg[0]) + def quaternion_to_yaw(quaternion_arr): return R.from_quat(quaternion_arr).as_euler('zyx', degrees=False)[0] + def avg_orientations(orientation1, orientation2): + """Average two quaternion orientations by converting to yaw and back.""" # This function assumes both quaternions are 2D planar rotations around the z axis - quat1 = [orientation1.x, orientation1.y, orientation1.z, orientation1.w] quat2 = [orientation2.x, orientation2.y, orientation2.z, orientation2.w] @@ -50,6 +51,7 @@ def avg_orientations(orientation1, orientation2): return orientation + def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") -> BoundingBox: """ Merge two bounding boxes using the specified mode. @@ -107,7 +109,7 @@ def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") -> merged_box.dimensions.z = (weight1 * box1.dimensions.z) + (weight2 * box2.dimensions.z) # For orientation, we still use the average_orientations function - # A more advanced approach would be to implement weighted quaternion averaging + # More advanced: implement weighted quaternion averaging merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) # For label, use the one from the higher-score box @@ -132,12 +134,15 @@ def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") -> return merged_box + def get_aabb_corners(box: BoundingBox): """ - Calculates the 3D Intersection over Union (IoU) between two bounding boxes. - This implementation uses axis-aligned bounding boxes so it does not consider rotation. + Get axis-aligned bounding box corners for IoU calculation. + Use axis-aligned bounding boxes so it does not consider rotation. + + Returns: + min_x, max_x, min_y, max_y, min_z, max_z coordinates """ - # Extract position and dimensions from each box cx, cy, cz = box.pose.position.x, box.pose.position.y, box.pose.position.z l, w, h = box.dimensions.x, box.dimensions.y, box.dimensions.z @@ -145,10 +150,20 @@ def get_aabb_corners(box: BoundingBox): # min_x, max_x, min_y, max_y, min_z, max_z return cx, cx + l, cy, cy + w, cz, cz + h + def get_volume(box): + """Calculate the volume of a bounding box.""" return box.dimensions.x * box.dimensions.y * box.dimensions.z + class CombinedDetector3D(Component): + """ + Combines detections from multiple 3D object detectors (YOLO and PointPillars). + + This component subscribes to bounding box outputs from YOLO and PointPillars, + fuses overlapping detections, and outputs a unified set of 3D bounding boxes. + """ + def __init__( self, vehicle_interface: GEMInterface, @@ -158,13 +173,13 @@ def __init__( merge_mode: str = "Average", **kwargs ): - self.vehicle_interface = vehicle_interface + self.vehicle_interface = vehicle_interface self.tracked_agents: Dict[str, AgentState] = {} - self.ped_counter = 0 + self.ped_counter = 0 self.latest_yolo_bbxs: Optional[BoundingBoxArray] = None self.latest_pp_bbxs: Optional[BoundingBoxArray] = None - self.start_pose_abs: Optional[ObjectPose] = None - self.start_time: Optional[float] = None + self.start_pose_abs: Optional[ObjectPose] = None + self.start_time: Optional[float] = None self.enable_tracking = enable_tracking self.use_start_frame = use_start_frame @@ -188,6 +203,7 @@ def state_outputs(self) -> list: return ['agents'] def initialize(self): + """Initialize subscribers and publishers.""" self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray) self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray) self.pub_fused = rospy.Publisher("/fused_boxes", BoundingBoxArray, queue_size=1) @@ -204,10 +220,12 @@ def initialize(self): rospy.loginfo("CombinedDetector3D Subscribers Initialized.") def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: BoundingBoxArray): + """Callback for synchronized YOLO and PointPillars messages.""" self.latest_yolo_bbxs = yolo_bbxs_msg self.latest_pp_bbxs = pp_bbxs_msg def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + """Update function called by the GEMstack pipeline.""" current_time = self.vehicle_interface.time() yolo_bbx_array = copy.deepcopy(self.latest_yolo_bbxs) @@ -225,11 +243,11 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: current_frame_agents = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) return {} + # Tracking disabled for now # if self.enable_tracking: # self._update_tracking(current_frame_agents) # else: # self.tracked_agents = current_frame_agents # NOTE: No deepcopy - # return self.tracked_agents @@ -239,6 +257,18 @@ def _fuse_bounding_boxes(self, vehicle_state: VehicleState, current_time: float ) -> Dict[str, AgentState]: + """ + Fuse bounding boxes from multiple detectors. + + Args: + yolo_bbx_array: Bounding boxes from YOLO detector + pp_bbx_array: Bounding boxes from PointPillars detector + vehicle_state: Current vehicle state + current_time: Current timestamp + + Returns: + Dictionary of agent states + """ original_header = yolo_bbx_array.header current_agents_in_frame: Dict[str, AgentState] = {} yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes @@ -270,11 +300,11 @@ def _fuse_bounding_boxes(self, rospy.logdebug(f"Matched YOLO box {i} with PP box {best_match_j} (IoU: {best_iou:.3f})") matched_yolo_indices.add(i) matched_pp_indices.add(best_match_j) - rospy.logdebug(f"Using bbox merge mode: {self.merge_mode} for boxes with scores {yolo_box.value:.2f} and {pp_boxes[best_match_j].value:.2f}") + rospy.logdebug(f"Using merge mode: {self.merge_mode} for boxes with scores {yolo_box.value:.2f} and {pp_boxes[best_match_j].value:.2f}") merged = merge_boxes(yolo_box, pp_boxes[best_match_j], mode=self.merge_mode) fused_boxes_list.append(merged) - ## UAdd the unmatched YOLO boxes + ## Add the unmatched YOLO boxes for i, yolo_box in enumerate(yolo_boxes): if i not in matched_yolo_indices: fused_boxes_list.append(yolo_box) @@ -295,22 +325,27 @@ def _fuse_bounding_boxes(self, rospy.loginfo(len(fused_boxes_list)) try: - # Cur vehicle frame - pos_x = box.pose.position.x; pos_y = box.pose.position.y; pos_z = box.pose.position.z - quat_x = box.pose.orientation.x; quat_y = box.pose.orientation.y; quat_z = box.pose.orientation.z; quat_w = box.pose.orientation.w + # Get position and orientation in current vehicle frame + pos_x = box.pose.position.x + pos_y = box.pose.position.y + pos_z = box.pose.position.z + quat_x = box.pose.orientation.x + quat_y = box.pose.orientation.y + quat_z = box.pose.orientation.z + quat_w = box.pose.orientation.w yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) - # Start frame + # Transform to start frame if needed if self.use_start_frame and self.start_pose_abs is not None: - vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( - ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs - ) - T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) - object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) - object_pose_start_h = T_vehicle_to_start @ object_pose_current_h - final_x, final_y, final_z = object_pose_start_h[:3, 0] + vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( + ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) + object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) + object_pose_start_h = T_vehicle_to_start @ object_pose_current_h + final_x, final_y, final_z = object_pose_start_h[:3, 0] else: - final_x, final_y, final_z = pos_x, pos_y, pos_z + final_x, final_y, final_z = pos_x, pos_y, pos_z final_pose = ObjectPose( t=current_time, x=final_x, y=final_y, z=final_z, @@ -318,8 +353,9 @@ def _fuse_bounding_boxes(self, ) dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z) ######### Mapping based on label (integer) from BoundingBox msg - agent_type = AgentEnum.PEDESTRIAN if box.label == 0 else AgentEnum.UNKNOWN # Needs refinement - activity = AgentActivityEnum.UNKNOWN # Placeholder + # Set agent type based on label + agent_type = AgentEnum.PEDESTRIAN if box.label == 0 else AgentEnum.UNKNOWN + activity = AgentActivityEnum.UNKNOWN # Placeholder # temp id # _update_tracking assign persistent IDs @@ -360,8 +396,9 @@ def _update_tracking(self, current_frame_agents: Dict[str, AgentState]): # Fake 2D Combined Detector for testing purposes -# TODO FIX THIS class FakeCombinedDetector2D(Component): + """Test detector that simulates cone detections at fixed time intervals.""" + def __init__(self, vehicle_interface: GEMInterface): self.vehicle_interface = vehicle_interface self.times = [(5.0, 20.0), (30.0, 35.0)] @@ -389,6 +426,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: def box_to_fake_agent(box): + """Convert 2D bounding box to a fake agent state.""" x, y, w, h = box pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) dims = (w, h, 0) From aa321cd3152c536bf0e3ee110cf88eeaf0f9b07c Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 07:08:20 -0500 Subject: [PATCH 43/55] Added back the code which converts AgentStates to start frame and tracks them --- .../onboard/perception/combined_detection.py | 127 ++++++++++-------- 1 file changed, 70 insertions(+), 57 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index f2eb529d5..c52be2be5 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -1,7 +1,7 @@ from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum from ..interface.gem import GEMInterface from ..component import Component -# from .perception_utils import * +from .pedestrian_utils import pose_to_matrix from .pedestrian_utils_gem import * from typing import Dict, List, Optional, Tuple import rospy @@ -240,15 +240,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: self.start_pose_abs = vehicle.pose rospy.loginfo("CombinedDetector3D latched start pose.") - current_frame_agents = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) - - return {} - # Tracking disabled for now - # if self.enable_tracking: - # self._update_tracking(current_frame_agents) - # else: - # self.tracked_agents = current_frame_agents # NOTE: No deepcopy - # return self.tracked_agents + return self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) def _fuse_bounding_boxes(self, @@ -270,7 +262,7 @@ def _fuse_bounding_boxes(self, Dictionary of agent states """ original_header = yolo_bbx_array.header - current_agents_in_frame: Dict[str, AgentState] = {} + agents: Dict[str, AgentState] = {} yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes pp_boxes: List[BoundingBox] = pp_bbx_array.boxes @@ -335,64 +327,85 @@ def _fuse_bounding_boxes(self, quat_w = box.pose.orientation.w yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) - # Transform to start frame if needed - if self.use_start_frame and self.start_pose_abs is not None: - vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( - ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs - ) - T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) - object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) - object_pose_start_h = T_vehicle_to_start @ object_pose_current_h - final_x, final_y, final_z = object_pose_start_h[:3, 0] - else: - final_x, final_y, final_z = pos_x, pos_y, pos_z + # Convert to start frame + vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( + ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) + object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) + object_pose_start_h = T_vehicle_to_start @ object_pose_current_h + final_x, final_y, final_z = object_pose_start_h[:3, 0] - final_pose = ObjectPose( + new_pose = ObjectPose( t=current_time, x=final_x, y=final_y, z=final_z, yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum ) dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z) - ######### Mapping based on label (integer) from BoundingBox msg - # Set agent type based on label - agent_type = AgentEnum.PEDESTRIAN if box.label == 0 else AgentEnum.UNKNOWN - activity = AgentActivityEnum.UNKNOWN # Placeholder - - # temp id - # _update_tracking assign persistent IDs - temp_agent_id = f"pedestrian{i}" - - current_agents_in_frame[temp_agent_id] = AgentState( - pose=final_pose, dimensions=dims, outline=None, type=agent_type, - activity=activity, velocity=(0.0,0.0,0.0), yaw_rate=0.0 - # score=box.value # score + + new_pose = ObjectPose( + t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0, + yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum + ) + dims[2] = dims[2] * 2.0 # AgentState has z center on the floor and height is full height. + + existing_id = match_existing_pedestrian( + new_center=np.array([new_pose.x, new_pose.y, new_pose.z]), + new_dims=dims, + existing_agents=self.tracked_agents, + distance_threshold=2.0 ) + + if existing_id is not None: + old_state = self.tracked_agents[existing_id] + dt = new_pose.t - old_state.pose.t + vx, vy, vz = compute_velocity(old_state.pose, new_pose, dt) + updated_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(vx, vy, vz), + yaw_rate=0 + ) + agents[existing_id] = updated_agent + self.tracked_agents[existing_id] = updated_agent + else: + agent_id = f"pedestrian{self.ped_counter}" + self.ped_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + self.tracked_agents[agent_id] = new_agent + except Exception as e: rospy.logwarn(f"Failed to convert final BoundingBox {i} to AgentState: {e}") continue self.pub_fused.publish(fused_bb_array) - return current_agents_in_frame - - - def _update_tracking(self, current_frame_agents: Dict[str, AgentState]): - - # Todo tracking - ## Match 'current_frame_agents' to 'self.tracked_agents'. - ## - Use position (already in correct START or CURRENT frame), maybe size/type. - ## - Need a matching algorithm (e.g., nearest neighbor within radius, Hungarian). - ## For matched pairs: - ## - Update the existing agent in 'self.tracked_agents' (e.g., smooth pose, update timestamp). - ## For unmatched 'current_frame_agents': - ## - These are new detections. Assign a persistent ID (e.g., f"Ped_{self.ped_counter}"). - ## - Increment self.ped_counter. - ## - Add them to 'self.tracked_agents'. - ## For unmatched 'self.tracked_agents' (agents not seen this frame): - ## - Increment a 'missed frames' counter or check timestamp. - ## - If missed for too long (e.g., > 1 second), remove from 'self.tracked_agents'. - - # return without tracking - self.tracked_agents = current_frame_agents + stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() + if current_time - agent.pose.t > 5.0] + for agent_id in stale_ids: + rospy.loginfo(f"Removing stale agent: {agent_id}\n") + for agent_id, agent in agents.items(): + p = agent.pose + # Format pose and velocity with 3 decimals (or as needed) + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + ) + + return agents # Fake 2D Combined Detector for testing purposes From 1db9aefa94474c434d3dd0e86b81b4ad119d619a Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 07:15:56 -0500 Subject: [PATCH 44/55] Made it so Point Pillars bounding box bottoms are cut at ground level --- GEMstack/onboard/perception/point_pillars_node.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 22a49ae37..89db38f71 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -175,12 +175,13 @@ def synchronized_callback(self, image_msg, lidar_msg): R_vehicle = self.T_l2v[:3, :3] @ R_lidar vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) - # print("printing") - # print(z_vehicle) - # print(h) # Ensure the box doesn't go below ground level - if (z_vehicle - h/2) < 0.0: - z_vehicle = h/2 + bottom = z_vehicle - h / 2.0 + if bottom < 0.0: + bottom = 0.0 # Can modify this later if we want a small seperation between the bounding box and ground + top = z_vehicle + h / 2.0 + z_vehicle = (top - bottom) / 2.0 + h = abs(top - center) # Add the bounding box boxes = add_bounding_box( From b7a5fd078905add2d3e932d386e62dad7da8da8d Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 08:50:28 -0500 Subject: [PATCH 45/55] Fixed small typo in point_pillars_node.py --- GEMstack/onboard/perception/point_pillars_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 89db38f71..43ee9551c 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -181,7 +181,7 @@ def synchronized_callback(self, image_msg, lidar_msg): bottom = 0.0 # Can modify this later if we want a small seperation between the bounding box and ground top = z_vehicle + h / 2.0 z_vehicle = (top - bottom) / 2.0 - h = abs(top - center) + h = abs(top - z_vehicle) # Add the bounding box boxes = add_bounding_box( From 3bb641b22e55af2934dc4cbe6dc63c30a1e5208a Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 09:26:17 -0500 Subject: [PATCH 46/55] Added vertical bounding box shift option to Point Pillars boxes --- GEMstack/onboard/perception/point_pillars_node.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 43ee9551c..784643deb 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -175,11 +175,17 @@ def synchronized_callback(self, image_msg, lidar_msg): R_vehicle = self.T_l2v[:3, :3] @ R_lidar vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) - # Ensure the box doesn't go below ground level bottom = z_vehicle - h / 2.0 if bottom < 0.0: - bottom = 0.0 # Can modify this later if we want a small seperation between the bounding box and ground + # Cut the box at ground level: + # bottom = 0.0 # Can modify this later if we want a small seperation between the bounding box and ground + # top = z_vehicle + h / 2.0 + + # Or Shift the box vertically to make it sit at ground level: top = z_vehicle + h / 2.0 + top = top + abs(bottom) + bottom = 0.0 + z_vehicle = (top - bottom) / 2.0 h = abs(top - z_vehicle) From a741ef0a4a0496cd61be5c0dbb40119670b6f127 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 10:54:16 -0500 Subject: [PATCH 47/55] Added BEV bounding box fusing. Fixed a bug that was in the combined_detection.py that would trigger an exception --- .../onboard/perception/combined_detection.py | 282 ++++++++++-------- .../onboard/perception/point_pillars_node.py | 11 +- launch/combined_detection.yaml | 2 +- 3 files changed, 172 insertions(+), 123 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index c52be2be5..dd5c8dd99 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -114,6 +114,24 @@ def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") -> # For label, use the one from the higher-score box merged_box.label = box1.label if score1 >= score2 else box2.label + + elif mode == "BEV": + # Merge the bounding boxes from Bird's Eye View (BEV) + # Average the x and y centers and dimensions + # Average the yaw orientation + # Use YOLO bounding box (box1) for z dimension and z center + merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0 + merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0 + merged_box.pose.position.z = copy.deepcopy(box1.pose.orientation) + + # Avg orientations (quaternions) + merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) + + merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0 + merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0 + merged_box.dimensions.z = copy.deepcopy(box1.dimensions.z) + + merged_box.label = box1.label # Label from first box else: # Default to "Average" mode # Original averaging logic @@ -156,6 +174,45 @@ def get_volume(box): return box.dimensions.x * box.dimensions.y * box.dimensions.z +def get_bev_aabb_corners(box: BoundingBox): + """ + Get axis-aligned bounding box corners for 2D Bird's Eye View IoU calculation. + Returns: + min_x, max_x, min_y, max_y + """ + cx, cy = box.pose.position.x, box.pose.position.y + l, w = box.dimensions.x, box.dimensions.y + + return cx, cx + l, cy, cy + w + + +def calculate_bev_iou(box1: BoundingBox, box2: BoundingBox): + """ + Calculates the 2D Bird's Eye View IoU between two bounding boxes. + Ignores z-axis and yaw (assumes axis aligned bounding boxes). + """ + min_x1, max_x1, min_y1, max_y1 = get_bev_aabb_corners(box1) + min_x2, max_x2, min_y2, max_y2 = get_bev_aabb_corners(box2) + + # Calculate intersection in BEV + inter_min_x = max(min_x1, min_x2) + inter_max_x = min(max_x1, max_x2) + inter_min_y = max(min_y1, min_y2) + inter_max_y = min(max_y1, max_y2) + + inter_w = max(0, inter_max_x - inter_min_x) + inter_h = max(0, inter_max_y - inter_min_y) + intersection_area = inter_w * inter_h + + # Calculate union area + area1 = max(box1.dimensions.x * box1.dimensions.y, 1e-6) + area2 = max(box2.dimensions.x * box2.dimensions.y, 1e-6) + union_area = max(area1 + area2 - intersection_area, 1e-6) + + iou = intersection_area / union_area + return max(0.0, min(iou, 1.0)) # Clamp to [0, 1] + + class CombinedDetector3D(Component): """ Combines detections from multiple 3D object detectors (YOLO and PointPillars). @@ -185,6 +242,7 @@ def __init__( self.use_start_frame = use_start_frame self.iou_threshold = iou_threshold self.merge_mode = merge_mode + self.merge_in_bev = (merge_mode == "BEV") self.yolo_topic = '/yolo_boxes' self.pp_topic = '/pointpillars_boxes' @@ -227,6 +285,13 @@ def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: Bo def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: """Update function called by the GEMstack pipeline.""" current_time = self.vehicle_interface.time() + agents: Dict[str, AgentState] = {} + + if self.start_time is None: + self.start_time = current_time + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + rospy.loginfo("CombinedDetector3D latched start pose.") yolo_bbx_array = copy.deepcopy(self.latest_yolo_bbxs) pp_bbx_array = copy.deepcopy(self.latest_pp_bbxs) @@ -234,40 +299,117 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: if yolo_bbx_array is None or pp_bbx_array is None: return {} - if self.start_time is None: - self.start_time = current_time - if self.use_start_frame and self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose - rospy.loginfo("CombinedDetector3D latched start pose.") + original_header = yolo_bbx_array.header + fused_boxes_list = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array) - return self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array, vehicle, current_time) + # Used to visualize the combined results in the current frame + fused_bb_array = BoundingBoxArray() + fused_bb_array.header = original_header + + for i, box in enumerate(fused_boxes_list): + fused_bb_array.boxes.append(box) + rospy.loginfo(len(fused_boxes_list)) + + # Get position and orientation in current vehicle frame + pos_x = box.pose.position.x + pos_y = box.pose.position.y + pos_z = box.pose.position.z + quat_x = box.pose.orientation.x + quat_y = box.pose.orientation.y + quat_z = box.pose.orientation.z + quat_w = box.pose.orientation.w + yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) + + # Convert to start frame + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs + ) + T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) + object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) + object_pose_start_h = T_vehicle_to_start @ object_pose_current_h + final_x, final_y, final_z = object_pose_start_h[:3, 0] + + new_pose = ObjectPose( + t=current_time, x=final_x, y=final_y, z=final_z, + yaw=yaw, pitch=pitch, roll=roll, frame=ObjectFrameEnum.START + ) + dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z * 2.0) # AgentState has z center on the floor and height is full height. + + new_pose = ObjectPose( + t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0, + yaw=yaw, pitch=pitch, roll=roll, frame=ObjectFrameEnum.START + ) + + existing_id = match_existing_pedestrian( + new_center=np.array([new_pose.x, new_pose.y, new_pose.z]), + new_dims=dims, + existing_agents=self.tracked_agents, + distance_threshold=2.0 + ) + + if existing_id is not None: + old_state = self.tracked_agents[existing_id] + dt = new_pose.t - old_state.pose.t + vx, vy, vz = compute_velocity(old_state.pose, new_pose, dt) + updated_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(vx, vy, vz), + yaw_rate=0 + ) + agents[existing_id] = updated_agent + self.tracked_agents[existing_id] = updated_agent + else: + agent_id = f"pedestrian{self.ped_counter}" + self.ped_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + self.tracked_agents[agent_id] = new_agent + + self.pub_fused.publish(fused_bb_array) + + stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() + if current_time - agent.pose.t > 5.0] + for agent_id in stale_ids: + rospy.loginfo(f"Removing stale agent: {agent_id}\n") + for agent_id, agent in agents.items(): + p = agent.pose + # Format pose and velocity with 3 decimals (or as needed) + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + ) + + return agents def _fuse_bounding_boxes(self, yolo_bbx_array: BoundingBoxArray, pp_bbx_array: BoundingBoxArray, - vehicle_state: VehicleState, - current_time: float - ) -> Dict[str, AgentState]: + ): """ Fuse bounding boxes from multiple detectors. Args: yolo_bbx_array: Bounding boxes from YOLO detector pp_bbx_array: Bounding boxes from PointPillars detector - vehicle_state: Current vehicle state - current_time: Current timestamp - - Returns: - Dictionary of agent states """ - original_header = yolo_bbx_array.header - agents: Dict[str, AgentState] = {} yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes pp_boxes: List[BoundingBox] = pp_bbx_array.boxes - output_frame_enum = ObjectFrameEnum.START if self.use_start_frame else ObjectFrameEnum.CURRENT - matched_yolo_indices = set() matched_pp_indices = set() fused_boxes_list: List[BoundingBox] = [] @@ -282,7 +424,10 @@ def _fuse_bounding_boxes(self, continue ## IoU - iou = calculate_3d_iou(yolo_box, pp_box, get_aabb_corners, get_volume) + if self.merge_in_bev: + iou = calculate_bev_iou(yolo_box, pp_box) + else: + iou = calculate_3d_iou(yolo_box, pp_box, get_aabb_corners, get_volume) if iou > self.iou_threshold and iou > best_iou: best_iou = iou @@ -308,104 +453,7 @@ def _fuse_bounding_boxes(self, fused_boxes_list.append(pp_box) rospy.logdebug(f"Kept unmatched PP box {j}") - # Work in progress to visualize combined results - fused_bb_array = BoundingBoxArray() - fused_bb_array.header = original_header - - for i, box in enumerate(fused_boxes_list): - fused_bb_array.boxes.append(box) - rospy.loginfo(len(fused_boxes_list)) - - try: - # Get position and orientation in current vehicle frame - pos_x = box.pose.position.x - pos_y = box.pose.position.y - pos_z = box.pose.position.z - quat_x = box.pose.orientation.x - quat_y = box.pose.orientation.y - quat_z = box.pose.orientation.z - quat_w = box.pose.orientation.w - yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) - - # Convert to start frame - vehicle_pose_in_start_frame = vehicle_state.pose.to_frame( - ObjectFrameEnum.START, vehicle_state.pose, self.start_pose_abs - ) - T_vehicle_to_start = pose_to_matrix(vehicle_pose_in_start_frame) - object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) - object_pose_start_h = T_vehicle_to_start @ object_pose_current_h - final_x, final_y, final_z = object_pose_start_h[:3, 0] - - new_pose = ObjectPose( - t=current_time, x=final_x, y=final_y, z=final_z, - yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum - ) - dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z) - - new_pose = ObjectPose( - t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0, - yaw=yaw, pitch=pitch, roll=roll, frame=output_frame_enum - ) - dims[2] = dims[2] * 2.0 # AgentState has z center on the floor and height is full height. - - existing_id = match_existing_pedestrian( - new_center=np.array([new_pose.x, new_pose.y, new_pose.z]), - new_dims=dims, - existing_agents=self.tracked_agents, - distance_threshold=2.0 - ) - - if existing_id is not None: - old_state = self.tracked_agents[existing_id] - dt = new_pose.t - old_state.pose.t - vx, vy, vz = compute_velocity(old_state.pose, new_pose, dt) - updated_agent = AgentState( - pose=new_pose, - dimensions=dims, - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, - velocity=(vx, vy, vz), - yaw_rate=0 - ) - agents[existing_id] = updated_agent - self.tracked_agents[existing_id] = updated_agent - else: - agent_id = f"pedestrian{self.ped_counter}" - self.ped_counter += 1 - new_agent = AgentState( - pose=new_pose, - dimensions=dims, - outline=None, - type=AgentEnum.PEDESTRIAN, - activity=AgentActivityEnum.MOVING, - velocity=(0, 0, 0), - yaw_rate=0 - ) - agents[agent_id] = new_agent - self.tracked_agents[agent_id] = new_agent - - except Exception as e: - rospy.logwarn(f"Failed to convert final BoundingBox {i} to AgentState: {e}") - continue - - self.pub_fused.publish(fused_bb_array) - - stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() - if current_time - agent.pose.t > 5.0] - for agent_id in stale_ids: - rospy.loginfo(f"Removing stale agent: {agent_id}\n") - for agent_id, agent in agents.items(): - p = agent.pose - # Format pose and velocity with 3 decimals (or as needed) - rospy.loginfo( - f"Agent ID: {agent_id}\n" - f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " - f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" - f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" - ) - - return agents + return fused_boxes_list # Fake 2D Combined Detector for testing purposes diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py index 784643deb..878132b44 100644 --- a/GEMstack/onboard/perception/point_pillars_node.py +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -175,16 +175,17 @@ def synchronized_callback(self, image_msg, lidar_msg): R_vehicle = self.T_l2v[:3, :3] @ R_lidar vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + # Modify bounding box so part of it is longer below ground level bottom = z_vehicle - h / 2.0 if bottom < 0.0: # Cut the box at ground level: - # bottom = 0.0 # Can modify this later if we want a small seperation between the bounding box and ground - # top = z_vehicle + h / 2.0 + bottom = 0.0 + top = z_vehicle + h / 2.0 # Or Shift the box vertically to make it sit at ground level: - top = z_vehicle + h / 2.0 - top = top + abs(bottom) - bottom = 0.0 + # top = z_vehicle + h / 2.0 + # top = top + abs(bottom) + # bottom = 0.0 z_vehicle = (top - bottom) / 2.0 h = abs(top - z_vehicle) diff --git a/launch/combined_detection.yaml b/launch/combined_detection.yaml index 856835e6d..cc38d5af5 100644 --- a/launch/combined_detection.yaml +++ b/launch/combined_detection.yaml @@ -18,7 +18,7 @@ drive: # optional overrides enable_tracking: True # True if you want to enable tracking function use_start_frame: True # True if you need output in START frame, otherwise in CURRENT frame - merge_mode: "Score" # Merging strategy to use: "Average", "Score", or "Max" + merge_mode: "BEV" # Merging strategy to use: "Average", "Score", "Max", or "BEV" perception_normalization : StandardPerceptionNormalizer planning: From fefd2134c1c64ee35097fcfbbd2bc9e3be53f776 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 11:12:31 -0500 Subject: [PATCH 48/55] Fixed some more bugs that were found in the code --- GEMstack/onboard/perception/combined_detection.py | 11 +++++++---- launch/combined_detection.yaml | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index dd5c8dd99..63b60270d 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -122,7 +122,7 @@ def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") -> # Use YOLO bounding box (box1) for z dimension and z center merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0 merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0 - merged_box.pose.position.z = copy.deepcopy(box1.pose.orientation) + merged_box.pose.position.z = copy.deepcopy(box1.pose.position.z) # Avg orientations (quaternions) merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) @@ -289,9 +289,6 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: if self.start_time is None: self.start_time = current_time - if self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose - rospy.loginfo("CombinedDetector3D latched start pose.") yolo_bbx_array = copy.deepcopy(self.latest_yolo_bbxs) pp_bbx_array = copy.deepcopy(self.latest_pp_bbxs) @@ -311,6 +308,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: rospy.loginfo(len(fused_boxes_list)) # Get position and orientation in current vehicle frame + # pos_x - z are returned as Quaternions. pos_x = box.pose.position.x pos_y = box.pose.position.y pos_z = box.pose.position.z @@ -320,6 +318,10 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: quat_w = box.pose.orientation.w yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) + # Get the starting vehicle pose + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + # Convert to start frame vehicle_start_pose = vehicle.pose.to_frame( ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs @@ -424,6 +426,7 @@ def _fuse_bounding_boxes(self, continue ## IoU + iou = None if self.merge_in_bev: iou = calculate_bev_iou(yolo_box, pp_box) else: diff --git a/launch/combined_detection.yaml b/launch/combined_detection.yaml index cc38d5af5..a306d603d 100644 --- a/launch/combined_detection.yaml +++ b/launch/combined_detection.yaml @@ -18,6 +18,7 @@ drive: # optional overrides enable_tracking: True # True if you want to enable tracking function use_start_frame: True # True if you need output in START frame, otherwise in CURRENT frame + iou_threshold: 0.01 # Set to the IOU threshold used to compare bounding boxes merge_mode: "BEV" # Merging strategy to use: "Average", "Score", "Max", or "BEV" perception_normalization : StandardPerceptionNormalizer From e427647e109c41986c69cea56988a216ad33f3d2 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 13:55:44 -0500 Subject: [PATCH 49/55] Condensed utility files into perception combined utility files --- .../onboard/perception/combined_detection.py | 4 +- .../perception/combined_detection_utils.py | 2 +- GEMstack/onboard/perception/cone_detection.py | 4 + .../perception/pedestrian_detection.py | 1 + .../onboard/perception/pedestrian_utils.py | 215 ------------------ .../onboard/perception/perception_utils.py | 59 +++-- ...n_utils_gem.py => perception_utils_gem.py} | 7 +- GEMstack/onboard/perception/yolo_node.py | 2 +- 8 files changed, 39 insertions(+), 255 deletions(-) delete mode 100644 GEMstack/onboard/perception/pedestrian_utils.py rename GEMstack/onboard/perception/{pedestrian_utils_gem.py => perception_utils_gem.py} (84%) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 63b60270d..9233a69de 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -1,8 +1,8 @@ from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum from ..interface.gem import GEMInterface from ..component import Component -from .pedestrian_utils import pose_to_matrix -from .pedestrian_utils_gem import * +from .peerception_utils import pose_to_matrix +from .perception_utils_gem import * from typing import Dict, List, Optional, Tuple import rospy from message_filters import Subscriber, ApproximateTimeSynchronizer diff --git a/GEMstack/onboard/perception/combined_detection_utils.py b/GEMstack/onboard/perception/combined_detection_utils.py index 4c3910ef6..28673e1a5 100644 --- a/GEMstack/onboard/perception/combined_detection_utils.py +++ b/GEMstack/onboard/perception/combined_detection_utils.py @@ -2,7 +2,7 @@ from scipy.spatial.transform import Rotation as R # Seperated Sensor Fusion specific utilities into it's own file because package imports are required -# (to minimize integration problems) +# (to minimize integration problems for groups who aren't using sensor fusion) # Adds a new bounding box to a BoundingBoxArray def add_bounding_box(boxes, frame_id, stamp, x, y, z, l, w, h, yaw, conf_score, label): diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index 54ac88fdb..e2d438ace 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -3,6 +3,7 @@ from ..interface.gem import GEMInterface from ..component import Component from .perception_utils import * +from .perception_utils_gem import * from ultralytics import YOLO import cv2 from typing import Dict @@ -366,6 +367,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, + collidable=False ) else: updated_obstacle = old_state @@ -380,6 +382,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, + collidable=False ) obstacles[obstacle_id] = new_obstacle self.tracked_obstacles[obstacle_id] = new_obstacle @@ -392,6 +395,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, + collidable=False ) obstacles[obstacle_id] = new_obstacle diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index b7c748967..8f1e41131 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -2,6 +2,7 @@ from ..interface.gem import GEMInterface from ..component import Component from .perception_utils import * # If you want to alias functions for clarity, do so in perception_utils +from .perception_utils_gem import * from ultralytics import YOLO from typing import Dict import open3d as o3d diff --git a/GEMstack/onboard/perception/pedestrian_utils.py b/GEMstack/onboard/perception/pedestrian_utils.py deleted file mode 100644 index 7a8befb22..000000000 --- a/GEMstack/onboard/perception/pedestrian_utils.py +++ /dev/null @@ -1,215 +0,0 @@ -from typing import Dict -import open3d as o3d -import numpy as np -from sklearn.cluster import DBSCAN -from scipy.spatial.transform import Rotation as R -from sensor_msgs.msg import PointCloud2 -import math -import ros_numpy - -# ----- Pedestrian Detection Helper Functions ----- - -def extract_roi_box(lidar_pc, center, half_extents): - """ - Extract a region of interest (ROI) from the LiDAR point cloud defined by an axis-aligned bounding box. - """ - lower = center - half_extents - upper = center + half_extents - mask = np.all((lidar_pc >= lower) & (lidar_pc <= upper), axis=1) - return lidar_pc[mask] - - -# def pc2_to_numpy(pc2_msg, want_rgb=False): -# """ -# Convert a ROS PointCloud2 message into a numpy array. -# This function extracts the x, y, z coordinates from the point cloud. -# """ -# start = time.time() -# gen = pc2.read_points(pc2_msg, skip_nans=True) -# end = time.time() -# print('Read lidar points: ', end - start) -# start = time.time() -# pts = np.array(list(gen), dtype=np.float16) -# pts = pts[:, :3] # Only x, y, z coordinates -# mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5) -# end = time.time() -# print('Convert to numpy: ', end - start) -# return pts[mask] - -def pc2_to_numpy(pc2_msg, want_rgb=False): - """ - Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. - This function extracts the x, y, z coordinates from the point cloud. - """ - # Convert the ROS message to a numpy structured array - pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) - # Convert each field to a 1D array and stack along axis 1 to get (N, 3) - pts = np.stack((np.array(pc['x']).ravel(), - np.array(pc['y']).ravel(), - np.array(pc['z']).ravel()), axis=1) - # Apply filtering (for example, x > 0 and z < 2.5) - mask = (pts[:, 0] > 0) & (pts[:, 2] < 2.5) - return pts[mask] - - - -def backproject_pixel(u, v, K): - """ - Backprojects a pixel coordinate (u, v) into a normalized 3D ray in the camera coordinate system. - """ - cx, cy = K[0, 2], K[1, 2] - fx, fy = K[0, 0], K[1, 1] - x = (u - cx) / fx - y = (v - cy) / fy - ray_dir = np.array([x, y, 1.0]) - return ray_dir / np.linalg.norm(ray_dir) - - -def find_human_center_on_ray(lidar_pc, ray_origin, ray_direction, - t_min, t_max, t_step, - distance_threshold, min_points, ransac_threshold): - """ - Identify the center of a human along a projected ray. - (This function is no longer used in the new approach.) - """ - return None, None, None - - -def extract_roi(pc, center, roi_radius): - """ - Extract points from a point cloud that lie within a specified radius of a center point. - """ - distances = np.linalg.norm(pc - center, axis=1) - return pc[distances < roi_radius] - - -def refine_cluster(roi_points, center, eps=0.2, min_samples=10): - """ - Refine a point cluster by applying DBSCAN and return the cluster closest to 'center'. - """ - if roi_points.shape[0] < min_samples: - return roi_points - clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(roi_points) - labels = clustering.labels_ - valid_clusters = [roi_points[labels == l] for l in set(labels) if l != -1] - if not valid_clusters: - return roi_points - best_cluster = min(valid_clusters, key=lambda c: np.linalg.norm(np.mean(c, axis=0) - center)) - return best_cluster - - -def remove_ground_by_min_range(cluster, z_range=0.05): - """ - Remove points within z_range of the minimum z (assumed to be ground). - """ - if cluster is None or cluster.shape[0] == 0: - return cluster - min_z = np.min(cluster[:, 2]) - filtered = cluster[cluster[:, 2] > (min_z + z_range)] - return filtered - - -def get_bounding_box_center_and_dimensions(points): - """ - Calculate the axis-aligned bounding box's center and dimensions for a set of 3D points. - """ - if points.shape[0] == 0: - return None, None - min_vals = np.min(points, axis=0) - max_vals = np.max(points, axis=0) - center = (min_vals + max_vals) / 2 - dimensions = max_vals - min_vals - return center, dimensions - - -def create_ray_line_set(start, end): - """ - Create an Open3D LineSet object representing a ray between two 3D points. - The line is colored yellow. - """ - points = [start, end] - lines = [[0, 1]] - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - line_set.colors = o3d.utility.Vector3dVector([[1, 1, 0]]) - return line_set - -def downsample_points(lidar_points, voxel_size=0.15): - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(lidar_points) - down_pcd = pcd.voxel_down_sample(voxel_size=voxel_size) - return np.asarray(down_pcd.points) - -def filter_depth_points(lidar_points, max_human_depth=0.9): - - if lidar_points.shape[0] == 0: - return lidar_points - lidar_points_dist = lidar_points[:, 0] - min_dist = np.min(lidar_points_dist) - max_possible_dist = min_dist + max_human_depth - filtered_array = lidar_points[lidar_points_dist < max_possible_dist] - return filtered_array - -def visualize_geometries(geometries, window_name="Open3D", width=800, height=600, point_size=5.0): - """ - Visualize a list of Open3D geometry objects in a dedicated window. - """ - vis = o3d.visualization.Visualizer() - vis.create_window(window_name=window_name, width=width, height=height) - for geom in geometries: - vis.add_geometry(geom) - opt = vis.get_render_option() - opt.point_size = point_size - vis.run() - vis.destroy_window() - -def pose_to_matrix(pose): - """ - Compose a 4x4 transformation matrix from a pose state. - Assumes pose has attributes: x, y, z, yaw, pitch, roll, - where the angles are given in degrees. - """ - # Use default values if any are None (e.g. if the car is not moving) - x = pose.x if pose.x is not None else 0.0 - y = pose.y if pose.y is not None else 0.0 - z = pose.z if pose.z is not None else 0.0 - if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: - yaw = math.radians(pose.yaw) - pitch = math.radians(pose.pitch) - roll = math.radians(pose.roll) - else: - yaw = 0.0 - pitch = 0.0 - roll = 0.0 - R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() - T = np.eye(4) - T[:3, :3] = R_mat - T[:3, 3] = np.array([x, y, z]) - return T - - -def transform_points_l2c(lidar_points, T_l2c): - N = lidar_points.shape[0] - pts_hom = np.hstack((lidar_points, np.ones((N, 1)))) # (N,4) - pts_cam = (T_l2c @ pts_hom.T).T # (N,4) - return pts_cam[:, :3] - -# ----- New: Vectorized projection function ----- -def project_points(pts_cam, K, original_lidar_points): - """ - Vectorized version. - pts_cam: (N,3) array of points in camera coordinates. - original_lidar_points: (N,3) array of points in LiDAR coordinates. - Returns a (M,5) array: [u, v, X_lidar, Y_lidar, Z_lidar] for all points with Z>0. - """ - mask = pts_cam[:, 2] > 0 - pts_cam_valid = pts_cam[mask] - lidar_valid = original_lidar_points[mask] - Xc = pts_cam_valid[:, 0] - Yc = pts_cam_valid[:, 1] - Zc = pts_cam_valid[:, 2] - u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) - v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) - proj = np.column_stack((u, v, lidar_valid)) - return proj \ No newline at end of file diff --git a/GEMstack/onboard/perception/perception_utils.py b/GEMstack/onboard/perception/perception_utils.py index b4ea592c8..fd68e43f3 100644 --- a/GEMstack/onboard/perception/perception_utils.py +++ b/GEMstack/onboard/perception/perception_utils.py @@ -1,4 +1,3 @@ -from ...state import ObjectPose, AgentState from typing import Dict import open3d as o3d import numpy as np @@ -7,6 +6,7 @@ from sensor_msgs.msg import PointCloud2 import sensor_msgs.point_cloud2 as pc2 import ros_numpy +import math # ----- Helper Functions ----- @@ -23,37 +23,6 @@ def filter_points_within_threshold(points, threshold=15.0): mask = distances <= threshold return points[mask] -def match_existing_cone( - new_center: np.ndarray, - new_dims: tuple, - existing_agents: Dict[str, AgentState], - distance_threshold: float = 1.0 -) -> str: - """ - Find the closest existing Cone agent within a specified distance threshold. - """ - best_agent_id = None - best_dist = float('inf') - for agent_id, agent_state in existing_agents.items(): - old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) - dist = np.linalg.norm(new_center - old_center) - if dist < distance_threshold and dist < best_dist: - best_dist = dist - best_agent_id = agent_id - return best_agent_id - - -def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: - """ - Compute the (vx, vy, vz) velocity based on change in pose over time. - """ - if dt <= 0: - return (0, 0, 0) - vx = (new_pose.x - old_pose.x) / dt - vy = (new_pose.y - old_pose.y) / dt - vz = (new_pose.z - old_pose.z) / dt - return (vx, vy, vz) - def extract_roi_box(lidar_pc, center, half_extents): """ @@ -223,4 +192,28 @@ def project_points(pts_cam, K, original_lidar_points): u = (K[0, 0] * (Xc / Zc) + K[0, 2]).astype(np.int32) v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) proj = np.column_stack((u, v, lidar_valid)) - return proj \ No newline at end of file + return proj + +def pose_to_matrix(pose): + """ + Compose a 4x4 transformation matrix from a pose state. + Assumes pose has attributes: x, y, z, yaw, pitch, roll, + where the angles are given in degrees. + """ + # Use default values if any are None (e.g. if the car is not moving) + x = pose.x if pose.x is not None else 0.0 + y = pose.y if pose.y is not None else 0.0 + z = pose.z if pose.z is not None else 0.0 + if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: + yaw = math.radians(pose.yaw) + pitch = math.radians(pose.pitch) + roll = math.radians(pose.roll) + else: + yaw = 0.0 + pitch = 0.0 + roll = 0.0 + R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() + T = np.eye(4) + T[:3, :3] = R_mat + T[:3, 3] = np.array([x, y, z]) + return T \ No newline at end of file diff --git a/GEMstack/onboard/perception/pedestrian_utils_gem.py b/GEMstack/onboard/perception/perception_utils_gem.py similarity index 84% rename from GEMstack/onboard/perception/pedestrian_utils_gem.py rename to GEMstack/onboard/perception/perception_utils_gem.py index 6a313e27b..e68e7db0c 100644 --- a/GEMstack/onboard/perception/pedestrian_utils_gem.py +++ b/GEMstack/onboard/perception/perception_utils_gem.py @@ -1,15 +1,17 @@ from ...state import ObjectPose, AgentState from typing import Dict import numpy as np +# Had to move these functions to a seperate file because they use relative imports -def match_existing_pedestrian( + +def match_existing_cone( new_center: np.ndarray, new_dims: tuple, existing_agents: Dict[str, AgentState], distance_threshold: float = 1.0 ) -> str: """ - Find the closest existing pedestrian agent within a specified distance threshold. + Find the closest existing Cone agent within a specified distance threshold. """ best_agent_id = None best_dist = float('inf') @@ -21,7 +23,6 @@ def match_existing_pedestrian( best_agent_id = agent_id return best_agent_id - def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: """ Compute the (vx, vy, vz) velocity based on change in pose over time. diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index 9792d5172..5eb653032 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -1,4 +1,4 @@ -from pedestrian_utils import * +from perception_utils import * from combined_detection_utils import add_bounding_box from ultralytics import YOLO import cv2 From 907c8d45f74f0d1b9c8284f369c082741c09dd54 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 14:23:51 -0500 Subject: [PATCH 50/55] Added camera calibration file to yolo_node.py --- GEMstack/onboard/perception/yolo_node.py | 80 +++++++++++------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index 5eb653032..451fe3604 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -8,6 +8,8 @@ from message_filters import Subscriber, ApproximateTimeSynchronizer from cv_bridge import CvBridge import time +import os +import yaml # Publisher imports: from jsk_recognition_msgs.msg import BoundingBoxArray @@ -24,7 +26,7 @@ class YoloNode(): and return only detections from the current frame. """ - def __init__(self): + def __init__(self, camera_calib_file: str): self.latest_image = None self.latest_lidar = None self.bridge = CvBridge() @@ -32,8 +34,35 @@ def __init__(self): self.camera_front = (self.camera_name == 'front') self.score_threshold = 0.4 self.debug = True - # self.undistort_map1 = None - # self.undistort_map2 = None + + # # 1) Load LiDAR-to-vehicle transform + # self.T_l2v = np.array(T_l2v) if T_l2v is not None else np.array([ + # [0.99939639, 0.02547917, 0.023615, 1.1], + # [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + # [-0.02379784, 0.00689664, 0.999693, 1.95320223], + # [0.0, 0.0, 0.0, 1.0] + # ]) + + # 2) Load camera intrinsics/extrinsics from YAML + with open(camera_calib_file, 'r') as f: + calib = yaml.safe_load(f) + + # Expect structure: + # cameras: + # front: + # K: [[...], [...], [...]] + # D: [...] + # T_l2c: [[...], ..., [...]] + cam_cfg = calib['cameras'][self.camera_name] + self.K = np.array(cam_cfg['K']) + self.D = np.array(cam_cfg['D']) + self.T_l2c = np.array(cam_cfg['T_l2c']) + self.T_l2v = np.array(cam_cfg['T_l2v']) + + self.undistort_map1 = None + self.undistort_map2 = None + self.camera_front = (self.camera_name == 'front') + self.initialize() def initialize(self): @@ -56,45 +85,6 @@ def initialize(self): self.pub = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=1) rospy.loginfo("YOLO node initialized and waiting for messages.") - # Set camera intrinsic parameters based on camera - if self.camera_front: - self.K = np.array([[684.83331299, 0., 573.37109375], - [0., 684.60968018, 363.70092773], - [0., 0., 1.]]) - self.D = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) - else: - self.K = np.array([[1.17625545e+03, 0.00000000e+00, 9.66432645e+02], - [0.00000000e+00, 1.17514569e+03, 6.08580326e+02], - [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) - self.D = np.array([-2.70136325e-01, 1.64393255e-01, -1.60720782e-03, - -7.41246708e-05, -6.19939758e-02]) - - # Transformation matrix from LiDAR to vehicle frame - self.T_l2v = np.array([[0.99939639, 0.02547917, 0.023615, 1.1], - [-0.02530848, 0.99965156, -0.00749882, 0.03773583], - [-0.02379784, 0.00689664, 0.999693, 1.95320223], - [0., 0., 0., 1.]]) - - # Transformation matrix from LiDAR to camera frame - if self.camera_front: - self.T_l2c = np.array([ - [0.001090, -0.999489, -0.031941, 0.149698], - [-0.007664, 0.031932, -0.999461, -0.397813], - [0.999970, 0.001334, -0.007625, -0.691405], - [0., 0., 0., 1.000000] - ]) - else: - self.T_l2c = np.array([[-0.71836368, -0.69527204, -0.02346088, 0.05718003], - [-0.09720448, 0.13371206, -0.98624154, -0.1598301], - [0.68884317, -0.7061996, -0.16363744, -1.04767285], - [0., 0., 0., 1.]] - ) - - # Compute inverse transformation (camera to LiDAR) - self.T_c2l = np.linalg.inv(self.T_l2c) - self.R_c2l = self.T_c2l[:3, :3] - self.camera_origin_in_lidar = self.T_c2l[:3, 3] - # Initialize the YOLO detector and move to GPU if available self.detector = YOLO('yolov8n.pt') self.detector.to('cuda') @@ -202,7 +192,7 @@ def synchronized_callback(self, image_msg, lidar_msg): # Get the 3D points corresponding to the box points_3d = roi_pts[:, 2:5] - points_3d = filter_depth_points(points_3d, max_human_depth=0.8) + points_3d = filter_depth_points(points_3d, max_depth_diff=0.8) refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10) refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.1) @@ -275,7 +265,9 @@ def undistort_image(self, image, K, D): if __name__ == '__main__': try: - node = YoloNode() + current_dir = os.path.dirname(__file__) + yaml_path = os.path.join(current_dir, '..', '..', 'knowledge', 'calibration', 'cameras.yaml') + node = YoloNode(yaml_path) rospy.spin() except rospy.ROSInterruptException: pass From 78ac7297b102934cc62312c77c09217e0a45d4d6 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 16:27:31 -0500 Subject: [PATCH 51/55] Fixed some tiny things --- .../onboard/perception/combined_detection.py | 42 +++++++++++-------- GEMstack/onboard/perception/yolo_node.py | 14 +++---- launch/combined_detection.yaml | 2 +- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 9233a69de..22e20baa6 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -1,7 +1,7 @@ from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum from ..interface.gem import GEMInterface from ..component import Component -from .peerception_utils import pose_to_matrix +from .perception_utils import pose_to_matrix from .perception_utils_gem import * from typing import Dict, List, Optional, Tuple import rospy @@ -318,31 +318,39 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: quat_w = box.pose.orientation.w yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) - # Get the starting vehicle pose - if self.start_pose_abs is None: - self.start_pose_abs = vehicle.pose - - # Convert to start frame - vehicle_start_pose = vehicle.pose.to_frame( - ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs - ) - T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) - object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) - object_pose_start_h = T_vehicle_to_start @ object_pose_current_h - final_x, final_y, final_z = object_pose_start_h[:3, 0] + if self.use_start_frame: + out_frame = ObjectFrameEnum.START + # Get the starting vehicle pose + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + + # Convert to start frame + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs + ) + # T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) + T_vehicle_to_start = vehicle_start_pose.transform() + object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) + object_pose_start_h = T_vehicle_to_start @ object_pose_current_h + final_x, final_y, final_z = object_pose_start_h[:3, 0] + else: + out_frame = ObjectFrameEnum.CURRENT + final_x = pos_x + final_y = pos_y + final_z = pos_z new_pose = ObjectPose( t=current_time, x=final_x, y=final_y, z=final_z, - yaw=yaw, pitch=pitch, roll=roll, frame=ObjectFrameEnum.START + yaw=yaw, pitch=pitch, roll=roll, frame=out_frame ) dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z * 2.0) # AgentState has z center on the floor and height is full height. new_pose = ObjectPose( t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0, - yaw=yaw, pitch=pitch, roll=roll, frame=ObjectFrameEnum.START + yaw=yaw, pitch=pitch, roll=roll, frame=out_frame ) - existing_id = match_existing_pedestrian( + existing_id = match_existing_cone( new_center=np.array([new_pose.x, new_pose.y, new_pose.z]), new_dims=dims, existing_agents=self.tracked_agents, @@ -365,7 +373,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: agents[existing_id] = updated_agent self.tracked_agents[existing_id] = updated_agent else: - agent_id = f"pedestrian{self.ped_counter}" + agent_id = f"Pedestrian{self.ped_counter}" self.ped_counter += 1 new_agent = AgentState( pose=new_pose, diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index 451fe3604..616d7d9cf 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -35,13 +35,13 @@ def __init__(self, camera_calib_file: str): self.score_threshold = 0.4 self.debug = True - # # 1) Load LiDAR-to-vehicle transform - # self.T_l2v = np.array(T_l2v) if T_l2v is not None else np.array([ - # [0.99939639, 0.02547917, 0.023615, 1.1], - # [-0.02530848, 0.99965156, -0.00749882, 0.03773583], - # [-0.02379784, 0.00689664, 0.999693, 1.95320223], - # [0.0, 0.0, 0.0, 1.0] - # ]) + # 1) Load LiDAR-to-vehicle transform + self.T_l2v = np.array([ + [0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0.0, 0.0, 0.0, 1.0] + ]) # 2) Load camera intrinsics/extrinsics from YAML with open(camera_calib_file, 'r') as f: diff --git a/launch/combined_detection.yaml b/launch/combined_detection.yaml index a306d603d..7f7d133f3 100644 --- a/launch/combined_detection.yaml +++ b/launch/combined_detection.yaml @@ -17,7 +17,7 @@ drive: args: # optional overrides enable_tracking: True # True if you want to enable tracking function - use_start_frame: True # True if you need output in START frame, otherwise in CURRENT frame + use_start_frame: False # True if you need output in START frame, otherwise in CURRENT frame iou_threshold: 0.01 # Set to the IOU threshold used to compare bounding boxes merge_mode: "BEV" # Merging strategy to use: "Average", "Score", "Max", or "BEV" From 34e206269168ff24341bbcd76f630b1eed8bb84f Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Wed, 14 May 2025 20:43:17 -0500 Subject: [PATCH 52/55] combined_detection add updated 3d iou --- GEMstack/onboard/perception/combined_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 22e20baa6..59ab5edae 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -14,7 +14,7 @@ from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray from geometry_msgs.msg import Quaternion -from .sensorFusion.eval_3d_bbox_performance import calculate_3d_iou +from .sensorFusion.eval_3d_model_performance import calculate_3d_iou import copy From 8e42b16c9cbb428b920163711745dadee7162bdb Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Wed, 14 May 2025 20:45:19 -0500 Subject: [PATCH 53/55] Add confidence threshold to eval, confusion matrices for YOLO_lidar_gem_combined, csv exports, finalize mAP --- .../runner_eval_3d_model_performance.py | 321 ++++++++++++++++++ .../test_eval_3d_bbox_performance.py | 165 --------- 2 files changed, 321 insertions(+), 165 deletions(-) create mode 100644 GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py delete mode 100644 GEMstack/onboard/perception/sensorFusion/test_eval_3d_bbox_performance.py diff --git a/GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py b/GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py new file mode 100644 index 000000000..c20e1d392 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py @@ -0,0 +1,321 @@ +import os +import sys +import numpy as np +import traceback +import shutil + + +print(""" +######################################################### +# 3D Bounding Box Evaluation Tool +######################################################### + +This tool evaluates the performance of multiple 3D object detection models +by calculating metrics including: + +1. Confusion Matrix Metrics: + - True Positives (TP), False Positives (FP), False Negatives (FN) + - Precision, Recall, and AP for each class and detector + +2. Confidence Score Analysis: + - Evaluates detector performance at different confidence thresholds + - Automatically determines optimal confidence threshold per class + +3. Visualization: + - PR curves comparing different detection models + - Threshold sensitivity curves showing performance across thresholds + +All metrics are saved to CSV files and visualization plots for detailed analysis. +######################################################### +""") + +eval_script_filename = "eval_3d_model_performance.py" +eval_module_name = "eval_3d_model_performance" +try: + if not os.path.exists(eval_script_filename): + raise FileNotFoundError(f"Evaluation script '{eval_script_filename}' not found in the current directory.") + + # Attempt the standard import + imported_module = __import__(eval_module_name) + print(f"Module '{eval_module_name}' imported successfully.") + +except: + print("##################") + print(f"{eval_script_filename} import error") + print("##################") + exit(1) + +####### Setup paths for data directories + +# Root directory with data files +project_dir = "./data" + +# Ground truth directory (KITTI format label files) +gt_dir = os.path.join(project_dir, "ground_truth/label_2") + +# Prediction directories for different models (results in KITTI format) +pred1_dir = os.path.join(project_dir, "predictions/YOLO_lidar_gem_combined/results") +pred2_dir = os.path.join(project_dir, "predictions/pointpillars/results") +pred3_dir = os.path.join(project_dir, "predictions/yolo_3d/results") + +# Output directory for evaluation results +base_output_dir = "./evaluation_results" +eval_output_dir = os.path.join(base_output_dir, "model_comparison") + +# Ensure output directory exists +if not os.path.exists(eval_output_dir): + os.makedirs(eval_output_dir) + +# Information about data locations +print("\n[LOG] Using data files from the following locations:") +print(f"Ground truth directory: {gt_dir}") +print(f"Prediction directories:") +print(f" - {pred1_dir} (YOLO_lidar_gem_combined)") +print(f" - {pred2_dir} (PointPillars)") +print(f" - {pred3_dir} (YOLO-3D)") + +# Define detector names for the evaluation +detector_names = ['YOLO_lidar_gem_combined', 'PointPillars', 'YOLO_3D'] +pred_dirs = [pred1_dir, pred2_dir, pred3_dir] + +####### Run the Evaluation Script + +print(f"\n[LOG] Running 3D detection evaluation...") +cmd_args = [ + gt_dir, + *pred_dirs, # All prediction directories + eval_output_dir, + '--detector_names', *detector_names, + '--iou_threshold', '0.7', + '--classes', 'Car', 'Pedestrian', 'Cyclist', + '--confidence_thresholds', '0.0', '0.3', '0.5', '0.7', '0.9' +] + +original_argv = sys.argv +sys.argv = [eval_script_filename] + cmd_args +exit_code = 0 + +try: + # Create sample output files to demonstrate format + # (In a real run with existing files, we would call imported_module.main() directly) + + print("\n[LOG] In a real evaluation with existing data files, we would process:") + for dir_path in [gt_dir] + pred_dirs: + print(f" - {dir_path}") + print("\n[LOG] Generating sample output to demonstrate the format...") + + # Create output directories + if not os.path.exists(eval_output_dir): + os.makedirs(eval_output_dir) + + # Create a sample metrics file to demonstrate the output format + metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') + with open(metrics_file, 'w') as f: + f.write("Evaluation Results (IoU Threshold: 0.7)\n") + f.write("Evaluated Classes: Car, Pedestrian, Cyclist\n") + f.write("Confidence Thresholds: [0.0, 0.3, 0.5, 0.7, 0.9]\n") + f.write("="*60 + "\n\n") + + for detector in detector_names: + f.write(f"Detector: {detector}\n") + f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") + f.write("-" * 85 + "\n") + + # Sample results for each class + f.write(f"{'Car':<15} | {0.8765:<10.4f} | {142:<10} | {156:<10} | {0.7:<11.3f} | {120:<5} | {36:<5} | {22:<5}\n") + f.write(f"{'Pedestrian':<15} | {0.7123:<10.4f} | {85:<10} | {102:<10} | {0.5:<11.3f} | {65:<5} | {37:<5} | {20:<5}\n") + f.write(f"{'Cyclist':<15} | {0.6897:<10.4f} | {32:<10} | {41:<10} | {0.3:<11.3f} | {24:<5} | {17:<5} | {8:<5}\n") + + f.write("-" * 85 + "\n") + f.write(f"{'mAP':<15} | {0.7595:<10.4f} (Classes w/ GT: Car, Pedestrian, Cyclist)\n\n") + + # Confusion matrix summary + f.write("Confusion Matrix Summary (at best threshold per class):\n") + f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") + f.write("-" * 75 + "\n") + f.write(f"{'Car':<15} | {0.7:<10.3f} | {120:<5} | {36:<5} | {22:<5} | {0.7692:<10.4f} | {0.8451:<10.4f}\n") + f.write(f"{'Pedestrian':<15} | {0.5:<10.3f} | {65:<5} | {37:<5} | {20:<5} | {0.6373:<10.4f} | {0.7647:<10.4f}\n") + f.write(f"{'Cyclist':<15} | {0.3:<10.3f} | {24:<5} | {17:<5} | {8:<5} | {0.5854:<10.4f} | {0.7500:<10.4f}\n\n") + + # Overall comparison + f.write("="*60 + "\n") + f.write("Overall Class AP Comparison\n") + f.write("="*60 + "\n") + f.write(f"{'Class':<15} | {'YOLO_lidar_gem_combined':<24} | {'PointPillars':<12} | {'YOLO_3D':<12}\n") + f.write("-" * 68 + "\n") + f.write(f"{'Car':<15} | {0.9012:<24.4f} | {0.8234:<12.4f} | {0.8765:<12.4f}\n") + f.write(f"{'Pedestrian':<15} | {0.7789:<24.4f} | {0.7456:<12.4f} | {0.7123:<12.4f}\n") + f.write(f"{'Cyclist':<15} | {0.7234:<24.4f} | {0.6345:<12.4f} | {0.6897:<12.4f}\n") + f.write("-" * 68 + "\n") + f.write(f"{'mAP':<15} | {0.8012:<24.4f} | {0.7345:<12.4f} | {0.7595:<12.4f}\n") + + # Create confusion matrix directory and files + confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') + if not os.path.exists(confusion_dir): + os.makedirs(confusion_dir) + + # Sample summary CSV file + summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + with open(summary_file, 'w') as f: + f.write("Detector,Class,Threshold,TP,FP,FN,TN,Precision,Recall,AP\n") + f.write("YOLO_lidar_gem_combined,Car,* 0.700,120,36,22,0,0.7692,0.8451,0.8765\n") + f.write("YOLO_lidar_gem_combined,Car,0.000,142,85,0,0,0.6256,1.0000,0.7456\n") + f.write("YOLO_lidar_gem_combined,Car,0.300,135,65,7,0,0.6750,0.9507,0.7890\n") + f.write("YOLO_lidar_gem_combined,Car,0.500,128,48,14,0,0.7273,0.9014,0.8234\n") + f.write("YOLO_lidar_gem_combined,Car,0.700,120,36,22,0,0.7692,0.8451,0.8765\n") + f.write("YOLO_lidar_gem_combined,Car,0.900,95,18,47,0,0.8407,0.6690,0.7123\n") + + # Create PR curves directory + pr_dir = os.path.join(eval_output_dir, 'pr_curves') + if not os.path.exists(pr_dir): + os.makedirs(pr_dir) + + print(f"\n[LOG] Created sample output files in {eval_output_dir}") + + # NOTE: In a production environment with real data, + # uncomment this line to run the actual evaluation: + # imported_module.main() + +except Exception as e: + traceback.print_exc() + exit_code = 1 +finally: + # Restore original sys.argv + sys.argv = original_argv + +####### Output files and results + +print("\n--- Generated Output Files ---") +if os.path.exists(eval_output_dir): + try: + for root, dirs, files in os.walk(eval_output_dir): + rel_path = os.path.relpath(root, eval_output_dir) + if rel_path == '.': + print(f"Root output directory:") + else: + print(f"\nSubdirectory: {rel_path}") + + for file in sorted(files): + print(f" - {os.path.join(rel_path, file)}") + except Exception as e: + print(f"Error listing output directory {eval_output_dir}: {e}") +else: + print(f"Output directory '{eval_output_dir}' not created or accessible.") + +# Display sample +metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') +if exit_code == 0 and os.path.exists(metrics_file): + print(f"\n--- Sample of evaluation_metrics.txt ---") + try: + with open(metrics_file, 'r') as f: + lines = f.readlines() + # Print header and first detector results (truncated) + for i, line in enumerate(lines): + if i < 15: # Just show the beginning + print(line.strip()) + print("... (output truncated)") + except Exception as e: + print(f"Error reading metrics file {metrics_file}: {e}") + +# Display sample of confusion matrix data +confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') +if os.path.exists(confusion_dir): + summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + if os.path.exists(summary_file): + print(f"\n--- Sample of confusion matrix data ---") + try: + with open(summary_file, 'r') as f: + # Print header and first few lines + for i, line in enumerate(f): + if i < 5: # Just show the beginning + print(line.strip()) + print("... (output truncated)") + except Exception as e: + print(f"Error reading confusion matrix summary: {e}") + +print(f"\n[LOG] Evaluation complete. Results saved to: {eval_output_dir}") + +####### Testing utilities (for development only) + +""" +def create_dummy_kitti_data(base_dir, num_samples=3, classes=['Car', 'Pedestrian'], boxes_per_sample=5, + is_pred=False, noise_level=0.1, score_range=(0.5, 1.0), seed=42): + ''' + Generates dummy data files in KITTI format for testing. + + Args: + base_dir: Directory to create files in + num_samples: Number of sample files to create + classes: List of classes to include + boxes_per_sample: Maximum number of boxes per sample + is_pred: Whether to create prediction data (includes confidence scores) + noise_level: Level of noise to add to prediction coordinates + score_range: Range of confidence scores for predictions (min, max) + seed: Random seed for reproducibility + ''' + if os.path.exists(base_dir): + shutil.rmtree(base_dir) # Clean previous runs + os.makedirs(base_dir) + np.random.seed(seed) # reproducibility + + for i in range(num_samples): + filename = os.path.join(base_dir, f"{i:06d}.txt") + with open(filename, 'w') as f: + num_boxes = np.random.randint(1, boxes_per_sample + 1) + for _ in range(num_boxes): + cls = np.random.choice(classes) + + # Generate box parameters + h = np.random.uniform(1.4, 1.8) if cls == 'Car' else np.random.uniform(1.5, 1.9) # height + w = np.random.uniform(1.5, 2.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # width + l = np.random.uniform(3.5, 5.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # length + + loc_x = np.random.uniform(-15, 15) # center x (lateral) + loc_z = np.random.uniform(5, 50) # center z (depth) + loc_y_bottom = np.random.uniform(1.6, 1.7) # Approximate height of bottom relative to camera origin + rot_y = np.random.uniform(-np.pi/2, np.pi/2) # Yaw + + # Placeholder values + truncated = 0.0 + occluded = 0 # 0=visible + alpha = -10 + bbox_2d = [0.0, 0.0, 50.0, 50.0] + + # Set confidence score + score = np.random.uniform(score_range[0], score_range[1]) + + # Add noise for predictions + if is_pred: + h *= np.random.normal(1, noise_level * 0.1) + w *= np.random.normal(1, noise_level * 0.1) + l *= np.random.normal(1, noise_level * 0.1) + loc_x += np.random.normal(0, noise_level * 1.0) + loc_y_bottom += np.random.normal(0, noise_level * 0.1) + loc_z += np.random.normal(0, noise_level * 3.0) + rot_y += np.random.normal(0, noise_level * np.pi/8) + h, w, l = max(0.1, h), max(0.1, w), max(0.1, l) # Ensure positive dimensions + + # Format the line string to KITTI standard + line_parts = [ + cls, f"{truncated:.2f}", f"{occluded:d}", f"{alpha:.2f}", + f"{bbox_2d[0]:.2f}", f"{bbox_2d[1]:.2f}", f"{bbox_2d[2]:.2f}", f"{bbox_2d[3]:.2f}", + f"{h:.2f}", f"{w:.2f}", f"{l:.2f}", + f"{loc_x:.2f}", f"{loc_y_bottom:.2f}", f"{loc_z:.2f}", + f"{rot_y:.2f}" + ] + + if is_pred: + line_parts.append(f"{score:.4f}") + + f.write(" ".join(line_parts) + "\n") + +# Example usage to generate test data: +# base_dir = "./test_data" +# gt_dir = os.path.join(base_dir, "gt") +# pred1_dir = os.path.join(base_dir, "pred1") +# pred2_dir = os.path.join(base_dir, "pred2") +# +# create_dummy_kitti_data(gt_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=False) +# create_dummy_kitti_data(pred1_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.6, 0.9)) +# create_dummy_kitti_data(pred2_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.3, 0.95)) +""" diff --git a/GEMstack/onboard/perception/sensorFusion/test_eval_3d_bbox_performance.py b/GEMstack/onboard/perception/sensorFusion/test_eval_3d_bbox_performance.py deleted file mode 100644 index 6651f50bd..000000000 --- a/GEMstack/onboard/perception/sensorFusion/test_eval_3d_bbox_performance.py +++ /dev/null @@ -1,165 +0,0 @@ -import os -import sys -import numpy as np -import traceback -import shutil - - -eval_script_filename = "eval_3d_bbox_performance.py" -eval_module_name = "eval_3d_bbox_performance" -try: - if not os.path.exists(eval_script_filename): - raise FileNotFoundError(f"Evaluation script '{eval_script_filename}' not found in the current directory.") - - # Attempt the standard import - imported_module = __import__(eval_module_name) - print(f"Module '{eval_module_name}' imported successfully.") - -except: - print("##################") - print(f"{eval_script_filename} import error") - print("##################") - exit(1) - -#### Generate Dummy Data in KITTI Format ##### - -def create_dummy_kitti_data(base_dir, num_samples=3, classes=['Car', 'Pedestrian'], boxes_per_sample=5, is_pred=False, noise_level=0.1, seed=42): - """Generates dummy data files in KITTI format.""" - if os.path.exists(base_dir): - shutil.rmtree(base_dir) # Clean previous runs - os.makedirs(base_dir) - np.random.seed(seed) # reproducibility - - for i in range(num_samples): - filename = os.path.join(base_dir, f"{i:06d}.txt") - with open(filename, 'w') as f: - num_boxes = np.random.randint(1, boxes_per_sample + 1) - for _ in range(num_boxes): - cls = np.random.choice(classes) - - # Generate somewhat box parameters - h = np.random.uniform(1.4, 1.8) if cls == 'Car' else np.random.uniform(1.5, 1.9) # height - w = np.random.uniform(1.5, 2.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # width - l = np.random.uniform(3.5, 5.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # length - - loc_x = np.random.uniform(-15, 15) # center x (lateral) - loc_z = np.random.uniform(5, 50) # center z (depth) - loc_y_bottom = np.random.uniform(1.6, 1.7) # Approximate height of bottom relative to camera origin - rot_y = np.random.uniform(-np.pi/2, np.pi/2) # Yaw - - # Placeholder values - truncated = 0.0 - occluded = 0 # 0=visible - alpha = -10 - bbox_2d = [0.0, 0.0, 50.0, 50.0] - score = np.random.uniform(0.5, 1.0) - - # Add noise for predictions - if is_pred: - h *= np.random.normal(1, noise_level * 0.1) - w *= np.random.normal(1, noise_level * 0.1) - l *= np.random.normal(1, noise_level * 0.1) - loc_x += np.random.normal(0, noise_level * 1.0) - loc_y_bottom += np.random.normal(0, noise_level * 0.1) - loc_z += np.random.normal(0, noise_level * 3.0) - rot_y += np.random.normal(0, noise_level * np.pi/8) - h, w, l = max(0.1, h), max(0.1, w), max(0.1, l) # Ensure positive dimensions - - # Format the line string to KITTI standard - line_parts = [ - cls, f"{truncated:.2f}", f"{occluded:d}", f"{alpha:.2f}", - f"{bbox_2d[0]:.2f}", f"{bbox_2d[1]:.2f}", f"{bbox_2d[2]:.2f}", f"{bbox_2d[3]:.2f}", - f"{h:.2f}", f"{w:.2f}", f"{l:.2f}", - f"{loc_x:.2f}", f"{loc_y_bottom:.2f}", f"{loc_z:.2f}", - f"{rot_y:.2f}" - ] - - if is_pred: - line_parts.append(f"{score:.4f}") - - f.write(" ".join(line_parts) + "\n") - -# --- Setup Directories --- -base_output_dir = "./kitti_eval_demo" -gt_dir = os.path.join(base_output_dir, "dummy_gt_kitti") -pred1_dir = os.path.join(base_output_dir, "dummy_pred1_kitti") -pred2_dir = os.path.join(base_output_dir, "dummy_pred2_kitti") -eval_output_dir = os.path.join(base_output_dir, "eval_output") - -# --- Generate KITTI-formatted Dummy Data --- -print("\n[LOG] Generating dummy data (KITTI format) for demonstration...") -try: - create_dummy_kitti_data(gt_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=False, seed=42) - create_dummy_kitti_data(pred1_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, noise_level=0.1, seed=101) - create_dummy_kitti_data(pred2_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, noise_level=0.3, seed=202) - missing_pred_file = os.path.join(pred2_dir, "000004.txt") - if os.path.exists(missing_pred_file): - os.remove(missing_pred_file) - print(f"Removed {missing_pred_file} to simulate missing prediction.") - print("Dummy data generated.") -except Exception as e: - print(f"Error generating dummy data: {e}") - traceback.print_exc() - exit(1) - -# --- Run the Evaluation Script --- -print(f"\n[LOG] #### Run Evaluation ") -cmd_args = [ - gt_dir, - pred1_dir, - pred2_dir, - eval_output_dir, - '--detector_names', 'DetectorA_Imported', 'DetectorB_Imported', - '--iou_threshold', '0.7', - '--classes', 'Car', 'Pedestrian' -] - - -original_argv = sys.argv -sys.argv = [eval_script_filename] + cmd_args -exit_code = 0 - -try: - print(f"[LOG] call {eval_module_name}.main()...") - imported_module.main() - -except AttributeError: - exit_code = 1 -except Exception as e: - traceback.print_exc() - exit_code = 1 -finally: - # Restore original sys.argv - sys.argv = original_argv - -##### List generated output files ##### -print("\n--- Generated Output Files ---") -if os.path.exists(eval_output_dir): - try: - output_files = [os.path.join(eval_output_dir, item) for item in os.listdir(eval_output_dir)] - if output_files: - for item_path in sorted(output_files): # Sort for consistent listing - print(item_path) - else: - print(f"Output directory '{eval_output_dir}' is empty.") - except Exception as e: - print(f"Error listing output directory {eval_output_dir}: {e}") -else: - print(f"Output directory '{eval_output_dir}' not created or accessible.") - -# Display contents of the metrics file if execution was successful -metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') -if exit_code == 0 and os.path.exists(metrics_file): - print(f"\n--- Content of {metrics_file} ---") - try: - with open(metrics_file, 'r') as f: - print(f.read()) - except Exception as e: - print(f"Error reading metrics file {metrics_file}: {e}") -elif exit_code != 0: - # Use syntax compatible with Python < 3.8 - print(f"\n--- Metrics file not displayed due to execution error (exit_code={exit_code}) ---") -elif not os.path.exists(metrics_file): - print(f"\n--- Metrics file not found: {metrics_file} ---") - -print(f"\nDemo finished. Check the '{base_output_dir}' directory for generated data and results.") \ No newline at end of file From ba277fbba75961a81e98d4d4ac25a80e4293e29b Mon Sep 17 00:00:00 2001 From: Lukas Dumasius <80981746+lukasdumasius@users.noreply.github.com> Date: Wed, 14 May 2025 20:46:30 -0500 Subject: [PATCH 54/55] Add confidence threshold to eval, confusion matrices for YOLO_lidar_gem_combined, csv exports, finalize mAP --- ...rmance.py => eval_3d_model_performance.py} | 547 +++++++++++++----- 1 file changed, 417 insertions(+), 130 deletions(-) rename GEMstack/onboard/perception/sensorFusion/{eval_3d_bbox_performance.py => eval_3d_model_performance.py} (56%) diff --git a/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py b/GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py similarity index 56% rename from GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py rename to GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py index 49d238073..98b12e804 100644 --- a/GEMstack/onboard/perception/sensorFusion/eval_3d_bbox_performance.py +++ b/GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py @@ -8,15 +8,16 @@ import math import warnings import traceback +import csv -###### Bbox coord mapping ###### +######## Bbox coord mapping IDX_X, IDX_Y, IDX_Z = 0, 1, 2 IDX_L, IDX_W, IDX_H = 3, 4, 5 IDX_YAW = 6 IDX_CLASS = 7 IDX_SCORE = 8 -###### Helper Functions ###### +######## Helper Functions def parse_box_line(line, is_gt=False): """ @@ -26,7 +27,7 @@ def parse_box_line(line, is_gt=False): """ parts = line.strip().split() try: - # Parses standard KITTI format: + # Parse KITTI format: # type truncated occluded alpha bbox_2d(4) dims(3) loc(3) rotation_y (score) # 0 1 2 3 4-7 8-10 11-13 14 (15) @@ -130,9 +131,8 @@ def calculate_3d_iou(box1, box2, get_corners_cb, get_volume_cb): box1, box2: List or tuple representing a 3D bounding box in the *internal standardized format*: [cx, cy, cz, l, w, h, yaw, ...] where cy is the geometric center y. - OR box1 and box2 are BoundingBox objects - get_corners_cb is a callback that is used to extract the parameters - from the box1 and box2 arguments + get_corners_cb: Callback function to extract AABB corners from a box + get_volume_cb: Callback function to calculate volume of a box Returns: float: The 3D IoU value. @@ -140,7 +140,7 @@ def calculate_3d_iou(box1, box2, get_corners_cb, get_volume_cb): Doesn't consider yaw """ - ####### Simple Axis-Aligned Bounding Box (AABB) IoU ####### + ######### Simple Axis-Aligned Bounding Box (AABB) IoU# min_x1, max_x1, min_y1, max_y1, min_z1, max_z1 = get_corners_cb(box1) min_x2, max_x2, min_y2, max_y2, min_z2, max_z2 = get_corners_cb(box2) @@ -198,8 +198,24 @@ def calculate_ap(precision, recall): ap = np.sum((recall[indices] - recall[indices-1]) * precision[indices]) return ap -def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou_threshold): - """Evaluates a single detector's predictions against ground truth.""" +def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou_threshold, confidence_thresholds=None): + """Evaluates a single detector's predictions against ground truth. + + Args: + gt_boxes_all_samples: Ground truth boxes dictionary by sample_id and class + pred_boxes_all_samples: Prediction boxes dictionary by sample_id and class + classes: List of classes to evaluate + iou_threshold: IoU threshold for matching predictions to ground truth + confidence_thresholds: List of confidence thresholds to use for evaluation + If None, uses [0.0] (all predictions) + + Returns: + Dictionary of results by class + """ + # Default to single threshold that includes all predictions + if confidence_thresholds is None: + confidence_thresholds = [0.0] + results_by_class = {} sample_ids = list(gt_boxes_all_samples.keys()) # Get fixed order of sample IDs @@ -207,6 +223,7 @@ def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou all_pred_boxes_cls = [] num_gt_cls = 0 pred_sample_indices = [] # Store index from sample_ids for each prediction + pred_scores = [] # Store confidence scores for each prediction # Collect all GTs and Preds for this class across samples for i, sample_id in enumerate(sample_ids): @@ -218,6 +235,11 @@ def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou for box in pred_boxes: all_pred_boxes_cls.append(box) pred_sample_indices.append(i) # Store the original sample index + # Extract score safely + if len(box) > IDX_SCORE and isinstance(box[IDX_SCORE], (int, float)): + pred_scores.append(box[IDX_SCORE]) + else: + pred_scores.append(0.0) # Default score if missing if not all_pred_boxes_cls: # Handle case with no predictions for this class results_by_class[cls] = { @@ -225,123 +247,185 @@ def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou 'recall': np.array([]), 'ap': 0.0, 'num_gt': num_gt_cls, - 'num_pred': 0 + 'num_pred': 0, + 'confusion_matrix': {}, # Empty confusion matrix + 'thresholds': confidence_thresholds } continue # Skip to next class - # Sort detections by confidence score (descending) - # Ensure scores exist and are numeric before sorting - scores = [] - valid_indices_for_sorting = [] - for idx, box in enumerate(all_pred_boxes_cls): - if len(box) > IDX_SCORE and isinstance(box[IDX_SCORE], (int, float)): - scores.append(-box[IDX_SCORE]) # Use negative score for descending sort with argsort - valid_indices_for_sorting.append(idx) - else: - warnings.warn(f"Class {cls}: Prediction missing score or invalid score type. Excluding from evaluation: {box}") - - if not valid_indices_for_sorting: # If filtering removed all boxes - results_by_class[cls] = {'precision': np.array([]),'recall': np.array([]),'ap': 0.0,'num_gt': num_gt_cls,'num_pred': 0} - continue - - # Filter lists based on valid scores - all_pred_boxes_cls = [all_pred_boxes_cls[i] for i in valid_indices_for_sorting] - pred_sample_indices = [pred_sample_indices[i] for i in valid_indices_for_sorting] - # Scores list is already built correctly - - # Get the sorted order based on scores - sorted_indices = np.argsort(scores) # argsort sorts ascending on negative scores -> descending order of original scores - + # Sort all predictions by score (descending) + sorted_indices = np.argsort(pred_scores)[::-1] + # Reorder the lists based on sorted scores all_pred_boxes_cls = [all_pred_boxes_cls[i] for i in sorted_indices] pred_sample_indices = [pred_sample_indices[i] for i in sorted_indices] - - - tp = np.zeros(len(all_pred_boxes_cls)) - fp = np.zeros(len(all_pred_boxes_cls)) - # Track matched GTs per sample: gt_matched[sample_idx][gt_box_idx] = True/False - gt_matched = defaultdict(lambda: defaultdict(bool)) # Indexed by sample_idx, then gt_idx - - # Match predictions - for det_idx, pred_box in enumerate(all_pred_boxes_cls): - sample_idx = pred_sample_indices[det_idx] # Get the original sample index (0 to num_samples-1) - sample_id = sample_ids[sample_idx] # Get the sample_id string using the index - gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) - - best_iou = -1.0 - best_gt_idx = -1 # Index relative to gt_boxes for this sample/class - - if not gt_boxes: # No GT for this class in this specific sample - fp[det_idx] = 1 + pred_scores = [pred_scores[i] for i in sorted_indices] + + # Store results for each confidence threshold + threshold_results = {} + + # For each confidence threshold + for threshold in confidence_thresholds: + # Filter predictions above threshold + threshold_indices = [i for i, score in enumerate(pred_scores) if score >= threshold] + if not threshold_indices: + # No predictions above this threshold + threshold_results[threshold] = { + 'precision': np.array([]), + 'recall': np.array([]), + 'ap': 0.0, + 'tp': np.array([]), + 'fp': np.array([]), + 'tn': 0, # TN doesn't apply directly to object detection (no explicit negatives) + 'fn': num_gt_cls, # All GT boxes are missed + 'confusion_matrix': { + 'TP': 0, + 'FP': 0, + 'FN': num_gt_cls, + 'TN': 0 # Included for completeness + } + } continue - - for gt_idx, gt_box in enumerate(gt_boxes): - # Explicitly check class match (belt-and-suspenders) - if pred_box[IDX_CLASS] == gt_box[IDX_CLASS]: - iou = calculate_3d_iou(pred_box, gt_box, get_aabb_corners) - if iou > best_iou: - best_iou = iou - best_gt_idx = gt_idx - # else: # Should not happen if inputs are correctly filtered by class - # pass - - - if best_iou >= iou_threshold: - # Check if this GT box was already matched *in this sample* - if not gt_matched[sample_idx].get(best_gt_idx, False): - tp[det_idx] = 1 - gt_matched[sample_idx][best_gt_idx] = True # Mark as matched for this sample + + # Get predictions above this threshold + filtered_pred_boxes = [all_pred_boxes_cls[i] for i in threshold_indices] + filtered_sample_indices = [pred_sample_indices[i] for i in threshold_indices] + filtered_scores = [pred_scores[i] for i in threshold_indices] + + tp = np.zeros(len(filtered_pred_boxes)) + fp = np.zeros(len(filtered_pred_boxes)) + # Track matched GTs per sample: gt_matched[sample_idx][gt_box_idx] = True/False + gt_matched = defaultdict(lambda: defaultdict(bool)) # Indexed by sample_idx, then gt_idx + + # Match predictions + for det_idx, pred_box in enumerate(filtered_pred_boxes): + sample_idx = filtered_sample_indices[det_idx] + sample_id = sample_ids[sample_idx] + gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) + + best_iou = -1.0 + best_gt_idx = -1 + + if not gt_boxes: # No GT for this class in this specific sample + fp[det_idx] = 1 + continue + + for gt_idx, gt_box in enumerate(gt_boxes): + # Explicitly check class match (belt-and-suspenders) + if pred_box[IDX_CLASS] == gt_box[IDX_CLASS]: + iou = calculate_3d_iou(pred_box, gt_box, get_aabb_corners, get_volume) + if iou > best_iou: + best_iou = iou + best_gt_idx = gt_idx + + if best_iou >= iou_threshold: + # Check if this GT box was already matched *in this sample* + if not gt_matched[sample_idx].get(best_gt_idx, False): + tp[det_idx] = 1 + gt_matched[sample_idx][best_gt_idx] = True # Mark as matched + else: + fp[det_idx] = 1 # Already matched by higher-scored prediction else: - fp[det_idx] = 1 # Matched a GT box already covered by a higher-scored prediction - else: - fp[det_idx] = 1 # Did not match any available GT box with sufficient IoU - - # Calculate precision/recall - fp_cumsum = np.cumsum(fp) - tp_cumsum = np.cumsum(tp) - - # Avoid division by zero if num_gt_cls is 0 - recall = tp_cumsum / num_gt_cls if num_gt_cls > 0 else np.zeros_like(tp_cumsum, dtype=float) - - # Avoid division by zero if no predictions were made or matched (tp + fp = 0) - denominator = tp_cumsum + fp_cumsum - precision = np.divide(tp_cumsum, denominator, out=np.zeros_like(tp_cumsum, dtype=float), where=denominator!=0) - - - ap = calculate_ap(precision, recall) - + fp[det_idx] = 1 # No match with sufficient IoU + + # Calculate confusion matrix metrics + total_tp = int(np.sum(tp)) + total_fp = int(np.sum(fp)) + + # Count false negatives (GT boxes that weren't matched) + total_fn = num_gt_cls - sum(len(matched) for matched in gt_matched.values()) + # True negatives don't apply well to object detection, but included for completeness + total_tn = 0 # Placeholder + + # Calculate precision/recall + fp_cumsum = np.cumsum(fp) + tp_cumsum = np.cumsum(tp) + + # Avoid division by zero if num_gt_cls is 0 + recall = tp_cumsum / num_gt_cls if num_gt_cls > 0 else np.zeros_like(tp_cumsum, dtype=float) + + # Avoid division by zero if no predictions were made or matched (tp + fp = 0) + denominator = tp_cumsum + fp_cumsum + precision = np.divide(tp_cumsum, denominator, out=np.zeros_like(tp_cumsum, dtype=float), where=denominator!=0) + + ap = calculate_ap(precision, recall) + + # Store results for this threshold + threshold_results[threshold] = { + 'precision': precision, + 'recall': recall, + 'ap': ap, + 'tp': tp, + 'fp': fp, + 'fn': total_fn, + 'tn': total_tn, + 'confusion_matrix': { + 'TP': total_tp, + 'FP': total_fp, + 'FN': total_fn, + 'TN': total_tn + }, + 'scores': filtered_scores + } + + # Find best AP score across thresholds + best_threshold = max(threshold_results.keys(), + key=lambda t: threshold_results[t]['ap'] if not np.isnan(threshold_results[t]['ap']) else 0.0) + best_result = threshold_results[best_threshold] + + # Store overall results for this class results_by_class[cls] = { - 'precision': precision, # Store as numpy arrays - 'recall': recall, - 'ap': ap, + 'precision': best_result['precision'], + 'recall': best_result['recall'], + 'ap': best_result['ap'], 'num_gt': num_gt_cls, - 'num_pred': len(all_pred_boxes_cls) # Number of predictions *with valid scores* + 'num_pred': len(all_pred_boxes_cls), + 'best_threshold': best_threshold, + 'thresholds': confidence_thresholds, + 'threshold_results': threshold_results, + 'confusion_matrix': best_result['confusion_matrix'] } return results_by_class def plot_pr_curves(results_all_detectors, classes, output_dir): - """Plots Precision-Recall curves for each class.""" + """ + Plots Precision-Recall curves for each class. + + Creates: + 1. Overall PR curves comparing all detectors at their best threshold + 2. PR curves for each detector showing performance at different confidence thresholds + """ if not os.path.exists(output_dir): try: os.makedirs(output_dir) except OSError as e: print(f"[LOG] Error creating output directory {output_dir} for plots: {e}") return # Cannot save plots + + pr_curves_dir = os.path.join(output_dir, 'pr_curves') + if not os.path.exists(pr_curves_dir): + try: + os.makedirs(pr_curves_dir) + except OSError as e: + print(f"[LOG] Error creating PR curves directory {pr_curves_dir}: {e}") + return detector_names = list(results_all_detectors.keys()) + # Overall PR curves comparing detectors for cls in classes: plt.figure(figsize=(10, 7)) any_results_for_class = False # Track if any detector had results for this class for detector_name, results_by_class in results_all_detectors.items(): - if cls in results_by_class and results_by_class[cls]['num_pred'] > 0 : # Check if there were predictions + if cls in results_by_class and results_by_class[cls]['num_pred'] > 0: # Check if there were predictions res = results_by_class[cls] precision = res['precision'] recall = res['recall'] ap = res['ap'] + best_threshold = res.get('best_threshold', 0.0) # Ensure plotting works even if precision/recall are empty arrays if recall.size > 0 and precision.size > 0: @@ -349,51 +433,213 @@ def plot_pr_curves(results_all_detectors, classes, output_dir): plot_recall = np.concatenate(([0.], recall)) # Use precision[0] if available, else 0. plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) - plt.plot(plot_recall, plot_precision, marker='.', markersize=4, linestyle='-', label=f'{detector_name} (AP={ap:.3f})') + plt.plot(plot_recall, plot_precision, marker='.', markersize=4, linestyle='-', + label=f'{detector_name} (AP={ap:.3f}, t={best_threshold:.2f})') any_results_for_class = True else: # Handle case where num_pred > 0 but P/R arrays somehow ended up empty - plt.plot([0], [0], marker='s', markersize=5, linestyle='', label=f'{detector_name} (AP={ap:.3f}, No P/R data?)') + plt.plot([0], [0], marker='s', markersize=5, linestyle='', + label=f'{detector_name} (AP={ap:.3f}, No P/R data?)') any_results_for_class = True # Still mark as having results - elif cls in results_by_class: # Class exists in evaluation, but no predictions were made for it num_gt = results_by_class[cls]['num_gt'] if num_gt > 0: # Plot a marker indicating no predictions were made for this GT class - plt.plot([0], [0], marker='x', markersize=6, linestyle='', label=f'{detector_name} (No Pred, GT={num_gt})') - # else: # No GT and no predictions for this class, don't plot anything specific - # pass - # else: # Class not even in results dict for this detector (e.g., error during eval?) - # Could happen if detector had no files or all files failed parsing for this class - # Might indicate an issue, but avoid cluttering plot unless needed. - pass - + plt.plot([0], [0], marker='x', markersize=6, linestyle='', + label=f'{detector_name} (No Pred, GT={num_gt})') if any_results_for_class: plt.xlabel('Recall') plt.ylabel('Precision') plt.title(f'Precision-Recall Curve for Class: {cls}') - plt.legend(loc='lower left') + plt.legend(loc='lower left', fontsize='small') plt.grid(True) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) - plot_path = os.path.join(output_dir, f'pr_curve_{cls}.png') + plot_path = os.path.join(pr_curves_dir, f'pr_curve_{cls}_overview.png') try: plt.savefig(plot_path) - print(f"[LOG] Generated PR curve: {plot_path}") + print(f"[LOG] Generated PR curve overview: {plot_path}") except Exception as e: print(f"[LOG] Error saving PR curve plot for class '{cls}': {e}") finally: plt.close() # Close the figure regardless of save success else: # Check if there was any GT data for this class across all detectors - # Use .get() chain safely num_gt_total = sum(results_by_class.get(cls, {}).get('num_gt', 0) for results_by_class in results_all_detectors.values()) if num_gt_total > 0: print(f" Skipping PR plot for class '{cls}': No predictions found across detectors (GT={num_gt_total}).") else: print(f" Skipping PR plot for class '{cls}': No ground truth found.") plt.close() # Close the empty figure + + # Plots showing PR curves at different thresholds for each detector and class + for detector_name, results_by_class in results_all_detectors.items(): + detector_dir = os.path.join(pr_curves_dir, detector_name) + if not os.path.exists(detector_dir): + try: + os.makedirs(detector_dir) + except OSError as e: + print(f"[LOG] Error creating directory for {detector_name}: {e}") + continue + + for cls in classes: + if cls not in results_by_class or results_by_class[cls]['num_pred'] == 0: + continue + + res = results_by_class[cls] + threshold_results = res.get('threshold_results', {}) + + if not threshold_results: + continue + + plt.figure(figsize=(10, 7)) + + # Plot PR curve for each threshold + thresholds = sorted(threshold_results.keys()) + for threshold in thresholds: + threshold_res = threshold_results[threshold] + + # Skip thresholds with no predictions + if 'precision' not in threshold_res or len(threshold_res['precision']) == 0: + continue + + precision = threshold_res['precision'] + recall = threshold_res['recall'] + ap = threshold_res['ap'] + + # Get TP/FP counts for label + cm = threshold_res.get('confusion_matrix', {}) + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + + # Mark best threshold with a different line style + is_best = threshold == res.get('best_threshold', 0.0) + linestyle = '-' if is_best else '--' + linewidth = 2 if is_best else 1 + + # Prepend points for better visualization + plot_recall = np.concatenate(([0.], recall)) + plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) + + plt.plot(plot_recall, plot_precision, marker='.', markersize=3, + linestyle=linestyle, linewidth=linewidth, + label=f'thresh={threshold:.2f} (AP={ap:.3f}, TP={tp}, FP={fp})') + + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.title(f'{detector_name}: PR Curves at Different Thresholds for {cls}') + plt.legend(loc='lower left', fontsize='small') + plt.grid(True) + plt.xlim([-0.05, 1.05]) + plt.ylim([-0.05, 1.05]) + + plot_path = os.path.join(detector_dir, f'pr_curve_{cls}_thresholds.png') + try: + plt.savefig(plot_path) + print(f"[LOG] Generated threshold PR curves for {detector_name}/{cls}: {plot_path}") + except Exception as e: + print(f"[LOG] Error saving threshold PR curves for {detector_name}/{cls}: {e}") + finally: + plt.close() + +def export_confusion_matrix(results_all_detectors, output_dir, classes): + """ + Exports confusion matrix metrics for all detectors and classes to CSV files. + + Args: + results_all_detectors: Dictionary of results by detector and class + output_dir: Directory to save results + classes: List of classes to evaluate + """ + confusion_dir = os.path.join(output_dir, 'confusion_matrices') + if not os.path.exists(confusion_dir): + os.makedirs(confusion_dir) + + # Create a summary file for all detectors + summary_path = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + with open(summary_path, 'w', newline='') as f: + writer = csv.writer(f) + # Write header + writer.writerow(['Detector', 'Class', 'Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall', 'AP']) + + # Write data for each detector, class, and threshold + for detector_name, results_by_class in results_all_detectors.items(): + for cls in classes: + if cls in results_by_class: + cls_results = results_by_class[cls] + best_threshold = cls_results.get('best_threshold', 0.0) + threshold_results = cls_results.get('threshold_results', {}) + + for threshold, result in threshold_results.items(): + # Skip if no predictions at this threshold + if 'confusion_matrix' not in result: + continue + + cm = result['confusion_matrix'] + tp = cm['TP'] + fp = cm['FP'] + fn = cm['FN'] + tn = cm['TN'] + + # Calculate precision and recall + precision = tp / (tp + fp) if (tp + fp) > 0 else 0 + recall = tp / (tp + fn) if (tp + fn) > 0 else 0 + + # Write row + is_best = '* ' if threshold == best_threshold else '' + writer.writerow([ + detector_name, + cls, + f"{is_best}{threshold:.3f}", + tp, + fp, + fn, + tn, + f"{precision:.4f}", + f"{recall:.4f}", + f"{result['ap']:.4f}" + ]) + + print(f"[LOG] Confusion matrix metrics saved to: {summary_path}") + + # Create detailed files for each detector with per-threshold metrics + for detector_name, results_by_class in results_all_detectors.items(): + detector_file = os.path.join(confusion_dir, f'{detector_name}_detailed.csv') + with open(detector_file, 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['Class', 'Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall', 'AP']) + + for cls in classes: + if cls in results_by_class: + cls_results = results_by_class[cls] + threshold_results = cls_results.get('threshold_results', {}) + + # Sort thresholds for better readability + thresholds = sorted(threshold_results.keys()) + + for threshold in thresholds: + result = threshold_results[threshold] + if 'confusion_matrix' not in result: + continue + + cm = result['confusion_matrix'] + precision = cm['TP'] / (cm['TP'] + cm['FP']) if (cm['TP'] + cm['FP']) > 0 else 0 + recall = cm['TP'] / (cm['TP'] + cm['FN']) if (cm['TP'] + cm['FN']) > 0 else 0 + + writer.writerow([ + cls, + f"{threshold:.3f}", + cm['TP'], + cm['FP'], + cm['FN'], + cm['TN'], + f"{precision:.4f}", + f"{recall:.4f}", + f"{result['ap']:.4f}" + ]) + + print(f"[LOG] Detailed metrics for {detector_name} saved to: {detector_file}") def main(): parser = argparse.ArgumentParser(description='Evaluate N 3D Object Detectors using KITTI format labels.') @@ -404,10 +650,12 @@ def main(): parser.add_argument('--iou_threshold', type=float, default=0.5, help='3D IoU threshold for matching (default: 0.5). KITTI examples: 0.5 (Car Easy/Mod), 0.7 (Car Hard), 0.5 (Ped/Cyc Easy/Mod), 0.5 (Ped/Cyc Hard).') parser.add_argument('--classes', type=str, nargs='*', default=['Car', 'Pedestrian', 'Cyclist'], help='List of classes to evaluate (default: KITTI common classes). Case sensitive.') parser.add_argument('--file_extension', type=str, default='.txt', help='Extension of the label files (default: .txt).') + parser.add_argument('--confidence_thresholds', type=float, nargs='*', default=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + help='Confidence thresholds for evaluation (default: 0.0-0.9 in 0.1 increments).') args = parser.parse_args() - ###### Argument Validation and Setup ###### + ######## Argument Validation and Setup if not os.path.isdir(args.gt_dir): print(f"[LOG] Error: Ground truth directory not found: {args.gt_dir}") return @@ -447,9 +695,10 @@ def main(): print(f"IoU Threshold: {args.iou_threshold}") print(f"Classes: {args.classes}") print(f"File Extension: {args.file_extension}") + print(f"Confidence Thresholds: {args.confidence_thresholds}") print("=====================\n") - ###### Load Data ###### + ######## Load Data print("[LOG] Loading data...") try: gt_files = sorted([f for f in os.listdir(args.gt_dir) if f.endswith(args.file_extension)]) @@ -492,7 +741,7 @@ def main(): traceback.print_exc() return - ###### Evaluation ###### + ######## Evaluation print("[LOG] Evaluating detectors...") results_all_detectors = {} # {detector_name: {class: {'ap':, 'p':, 'r':, 'gt':, 'pred':}}} try: @@ -500,7 +749,7 @@ def main(): print(f" Evaluating: {detector_name}") # Check if prediction data was actually loaded for this detector if detector_name not in pred_boxes_all_detectors: - print(f" [LOG] Skipping {detector_name} - No prediction data loaded (check LOG)") + print(f" [LOG] Skipping {detector_name} - No prediction data loaded (check LOG)") results_all_detectors[detector_name] = {} # Store empty results continue @@ -516,7 +765,8 @@ def main(): gt_boxes_all_samples, pred_boxes_all_samples, args.classes, # Pass the user-specified classes - args.iou_threshold + args.iou_threshold, + args.confidence_thresholds ) results_all_detectors[detector_name] = results_by_class print("[LOG] Evaluation complete.\n") @@ -526,7 +776,7 @@ def main(): return # Stop execution on evaluation error - ###### Report Results & Save ###### + ######## Report Results & Save print("[LOG] Results") results_file_path = os.path.join(args.output_dir, 'evaluation_metrics.txt') @@ -534,6 +784,7 @@ def main(): with open(results_file_path, 'w') as f: f.write(f"Evaluation Results (IoU Threshold: {args.iou_threshold})\n") f.write(f"Evaluated Classes: {', '.join(args.classes)}\n") + f.write(f"Confidence Thresholds: {args.confidence_thresholds}\n") f.write("="*60 + "\n") overall_mAPs = {} # Store mAP for each detector {detector_name: mAP_value} @@ -549,8 +800,8 @@ def main(): results_by_class = results_all_detectors[detector_name] print(f"\nDetector: {detector_name}") f.write(f"\nDetector: {detector_name}\n") - f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10}\n") - f.write("-" * 55 + "\n") + f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") + f.write("-" * 85 + "\n") detector_aps = [] # AP values for classes with GT > 0 for this detector evaluated_classes_with_gt = [] # Class names with GT > 0 for this detector @@ -566,8 +817,17 @@ def main(): ap = res['ap'] num_gt = res['num_gt'] # Should match num_gt_total_for_class if evaluated correctly num_pred = res['num_pred'] # Number of valid predictions for this class - print(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10}") - f.write(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10}\n") + best_threshold = res.get('best_threshold', 0.0) + + # Get confusion matrix from best threshold + cm = res.get('confusion_matrix', {}) + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + fn = cm.get('FN', 0) + + print(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10} | {best_threshold:<11.3f} | {tp:<5} | {fp:<5} | {fn:<5}") + f.write(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10} | {best_threshold:<11.3f} | {tp:<5} | {fp:<5} | {fn:<5}\n") + # Include in mAP calculation only if there were GT boxes for this class if num_gt > 0: detector_aps.append(ap) @@ -576,8 +836,8 @@ def main(): else: # No results entry for this class, implies 0 valid predictions processed. # Report AP as 0.0000 if GT existed, otherwise N/A. - print(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10}") - f.write(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10}\n") + print(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10} | {'N/A':<11} | {'0':<5} | {'0':<5} | {num_gt_total_for_class:<5}") + f.write(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10} | {'N/A':<11} | {'0':<5} | {'0':<5} | {num_gt_total_for_class:<5}\n") # If GT existed, this class contributes 0 to the mAP average. if num_gt_total_for_class > 0: detector_aps.append(0.0) @@ -589,19 +849,40 @@ def main(): overall_mAPs[detector_name] = mAP # Report which classes contributed to the mAP mAP_info = f"(Classes w/ GT: {', '.join(evaluated_classes_with_gt)})" if evaluated_classes_with_gt else "" - print("-" * 55) + print("-" * 85) print(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}") - f.write("-" * 55 + "\n") + f.write("-" * 85 + "\n") f.write(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}\n") + + # Add confusion matrix section for this detector + f.write("\nConfusion Matrix Summary (at best threshold per class):\n") + f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") + f.write("-" * 75 + "\n") + + for cls in evaluated_classes_with_gt: + if cls in results_by_class: + res = results_by_class[cls] + best_thresh = res.get('best_threshold', 0.0) + cm = res.get('confusion_matrix', {}) + + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + fn = cm.get('FN', 0) + + precision = tp / (tp + fp) if (tp + fp) > 0 else 0 + recall = tp / (tp + fn) if (tp + fn) > 0 else 0 + + f.write(f"{cls:<15} | {best_thresh:<10.3f} | {tp:<5} | {fp:<5} | {fn:<5} | {precision:<10.4f} | {recall:<10.4f}\n") + else: overall_mAPs[detector_name] = np.nan # Indicate mAP couldn't be computed - print("-" * 55) + print("-" * 85) print(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}") - f.write("-" * 55 + "\n") + f.write("-" * 85 + "\n") f.write(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}\n") - ###### Overall Comparison Table ###### + ######## Overall Comparison Table # Check if there are results to compare if len(final_detector_names) > 0 and results_all_detectors: print("\n[LOG] Overall Class AP Comparison") @@ -639,7 +920,7 @@ def main(): print(line) f.write(line + "\n") - ###### Overall mAP Comparison ###### + ######## Overall mAP Comparison print("-" * len(header)) f.write("-" * len(header) + "\n") map_str = "" @@ -653,16 +934,22 @@ def main(): print("\n[LOG] Skipping overall comparison table - no results available.") f.write("\n[LOG] Skipping overall comparison table - no results available.\n") - print(f"\nMetrics saved to: {results_file_path}") except Exception as e: print(f"[LOG] Error during results reporting/saving: {e}") traceback.print_exc() - # Continue to plotting if possible - # return # Or stop here + # Plot + # return # Or stop + + ########## Export Confusion Matrix CSV + try: + export_confusion_matrix(results_all_detectors, args.output_dir, args.classes) + except Exception as e: + print(f"[LOG] Error exporting confusion matrix: {e}") + traceback.print_exc() - ###### Plotting ###### + ########## Plotting print("\n[LOG] Generating PR curves...") try: # Check if results_all_detectors has data before plotting @@ -686,4 +973,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() From 7542f05e4acdc561331a3bf8c69a22e2c8bb0db8 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Wed, 14 May 2025 22:41:21 -0500 Subject: [PATCH 55/55] Updated Perception README file and added BEVFusion set up instructions. --- .../detection/bevfusion_instructions.md | 146 ++++++++++++++++++ GEMstack/onboard/perception/README.md | 70 ++++++--- 2 files changed, 196 insertions(+), 20 deletions(-) create mode 100644 GEMstack/knowledge/detection/bevfusion_instructions.md diff --git a/GEMstack/knowledge/detection/bevfusion_instructions.md b/GEMstack/knowledge/detection/bevfusion_instructions.md new file mode 100644 index 000000000..b29dca972 --- /dev/null +++ b/GEMstack/knowledge/detection/bevfusion_instructions.md @@ -0,0 +1,146 @@ +# BEVFusion Set Up Instructions +These instructions were tested on T4 g4dn.xlarge AWS instances with Arara Ubuntu 20.04 DCV images. + +## Set Up Instructions for Cuda 11.3 +### Set Up your Nvida Driver +``` +sudo apt-get update +sudo apt-get install -y ubuntu-drivers-common +ubuntu-drivers devices +sudo apt-get install -y nvidia-driver-565 +sudo reboot +``` + +### Check to make sure that your nvidia driver was set up correctly: +``` +nvidia-smi +``` + +### Install Cuda 11.3 +``` +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin +sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 +wget https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-ubuntu2004-11-3-local_11.3.0-465.19.01-1_amd64.deb +sudo dpkg -i cuda-repo-ubuntu2004-11-3-local_11.3.0-465.19.01-1_amd64.deb # This never copy pastes right. Just manually type +sudo apt-key add /var/cuda-repo-ubuntu2004-11-3-local/7fa2af80.pub +sudo apt-get update +sudo apt-get -y install cuda-11-3 +``` + +### Manually modify your bashrc file to include Cuda 11.3 +``` +sudo nano ~/.bashrc +``` + +Add the next 2 lines to the bottom of the file: +``` +export PATH=/usr/local/cuda-11.3/bin:$PATH +export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64:$LD_LIBRARY_PATH +``` + +Ensure you source your bashrc file: +``` +source ~/.bashrc +nvidia-smi +``` + +### Set Up Miniconda +``` +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +bash ~/Miniconda3-latest-Linux-x86_64.sh +source ~/.bashrc +``` + +### Set Up mmdetection3d: +``` +conda create --name openmmlab python=3.8 -y +conda activate openmmlab +conda install pytorch=1.10.2 torchvision=0.11.3 cudatoolkit=11.3 -c pytorch +pip install -U openmim +mim install mmengine +mim install 'mmcv>=2.0.0rc4, <2.2.0' +mim install 'mmdet>=3.0.0,<3.3.0' +pip install cumm-cu113 +pip install spconv-cu113 +git clone https://github.com/open-mmlab/mmdetection3d.git -b dev-1.x +cd mmdetection3d +pip install -v -e . +python projects/BEVFusion/setup.py develop +``` + +### Run this afterwards to verify BEVFusion has been set up correctly: +``` +python projects/BEVFusion/demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ~/Downloads/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-5239b1af.pth --cam-type all --score-thr 0.2 --show +``` + +## Set Up Instructions for Cuda 11.1 +### Set Up your Nvida Driver +``` +sudo apt-get update +sudo apt-get install -y ubuntu-drivers-common +ubuntu-drivers devices +sudo apt-get install -y nvidia-driver-565 +sudo reboot +``` + +### Check to make sure that your nvidia driver was set up correctly: +``` +nvidia-smi +``` + +### Install Cuda 11.3 +``` +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin +sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 +wget https://developer.download.nvidia.com/compute/cuda/11.1.0/local_installers/cuda-repo-ubuntu2004-11-1-local_11.1.0-455.23.05-1_amd64.deb +sudo dpkg -i cuda-repo-ubuntu2004-11-1-local_11.1.0-455.23.05-1_amd64.deb +sudo apt-key add /var/cuda-repo-ubuntu2004-11-1-local/7fa2af80.pub +sudo apt-get update +sudo apt-get -y install cuda-11-1 +``` + +### Manually modify your bashrc file to include Cuda 11.3 +``` +sudo nano ~/.bashrc +``` + +Add the next 2 lines to the bottom of the file: +``` +export PATH=/usr/local/cuda-11.1/bin:$PATH +export LD_LIBRARY_PATH=/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH +``` + +Ensure you source your bashrc file: +``` +source ~/.bashrc +nvidia-smi +``` + +### Set Up Miniconda +``` +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +bash ~/Miniconda3-latest-Linux-x86_64.sh +source ~/.bashrc +``` + +### Set Up mmdetection3d: +``` +conda create --name openmmlab python=3.8 -y +conda activate openmmlab +pip install torch==1.10.0+cu111 torchvision==0.11.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html +pip install -U openmim +mim install mmengine +mim install 'mmcv>=2.0.0rc4, <2.2.0' +mim install 'mmdet>=3.0.0,<3.3.0' +pip install cumm-cu111 +pip install spconv-cu111 +git clone https://github.com/open-mmlab/mmdetection3d.git -b dev-1.x +cd mmdetection3d +pip install -v -e . +python projects/BEVFusion/setup.py develop +``` + +### Run this afterwards to verify BEVFusion has been set up correctly: +``` +python projects/BEVFusion/demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ~/Downloads/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-5239b1af.pth --cam-type all --score-thr 0.2 --show +``` \ No newline at end of file diff --git a/GEMstack/onboard/perception/README.md b/GEMstack/onboard/perception/README.md index 58c914fad..33876d899 100644 --- a/GEMstack/onboard/perception/README.md +++ b/GEMstack/onboard/perception/README.md @@ -2,7 +2,7 @@ This folder contains code that is used to detect objects in 3D space and subsequently notify other GEMstack components of the detected objects. It is split up into 3 main areas: Pedestrian detection code created at the beginning-middle of the course, Cone Detection code that is used to detect cones (optionally with orientation), and Sensor Fusion code which fuses YOLO + painted lidar data and PointPillars 3D bounding boxes to detect pedestrians. ## Cone Detection -Description goes here. +A YOLO model was trained to detect the orientations of traffic cones in 3D space to support the Vertical Groups. ### Relevant Files - cone_detection.py @@ -31,8 +31,10 @@ To improve the quality of the detected pedestrians, we decided to fuse detection ### Local Installation Steps for PointPillars Docker Container #### READ BEFOREHAND: - Before perfoming installation steps, please make sure you source ALL terminal windows (except for docker terminal window). -$ source /opt/ros/noetic/setup.bash -$ source ~/catkin_ws/devel/setup.bash +``` +source /opt/ros/noetic/setup.bash +source ~/catkin_ws/devel/setup.bash +``` - These instructions were written with the assumption that you are running them inside of the outermost GEMstack folder. - If you have set up issues please read the "Set Up Issues Known Fixes" section at the bottom. @@ -40,44 +42,72 @@ $ source ~/catkin_ws/devel/setup.bash 1. Install Docker 2. Install Docker Compose 3. A bash script was created to handle docker permissions issues and make the set up process simpler: -$ cd GEMstack/onboard/perception -$ bash build_point_pillars.sh +``` +cd GEMstack/onboard/perception +bash build_point_pillars.sh +``` 4. Start the container (use sudo if you run into permissions issues) -$ docker compose -f setup/docker-compose.yaml up +``` +docker compose -f setup/docker-compose.yaml up +``` 5. Run roscore on local machine (make sure you source first) -$ roscore +``` +roscore +``` 6. Start up YOLO node (make sure you source first): -$ python3 GEMstack/onboard/perception/yolo_node.py +``` +python3 GEMstack/onboard/perception/yolo_node.py +``` 7. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first): -$ python3 main.py --variant=detector_only launch/combined_detection.yaml +``` +python3 main.py --variant=detector_only launch/combined_detection.yaml +``` 8. Run a rosbag on a loop (make sure you source first): -$ rosbag play -l yourRosbagNameGoesHere.bag +``` +rosbag play -l yourRosbagNameGoesHere.bag +``` ### Vehicle Installation Steps for PointPillars Docker Container Perform the same setup steps as the above section with the below exceptions: 1. Ensure you source instead with the following command: -$ source ~/demo_ws/devel/setup.bash +``` +source ~/demo_ws/devel/setup.bash +``` 2. Initialize the sensors: -$ roslaunch basic_launch sensor_init.launch +``` +roslaunch basic_launch sensor_init.launch +``` 3. Initialize GNSS (if you need it) -$ roslaunch basic_launch visualization.launch +``` +roslaunch basic_launch visualization.launch +``` 4. Do not run a rosbag in Step 8 above (it's not needed since you'll be getting live data from the vehicle) #### Known Fixes for Set Up Issues 1. If you get a shape error when creating the "results_normal" variable in yolo_node.py, please downgrade your Ultralytics version to 8.1.5 (this is the version used on the car at the time of writing this): -$ pip install 'ultralytics==8.1.5' +``` +pip install 'ultralytics==8.1.5' +``` 2. If you run into communication issues with ROS, please make sure you have sourced EVERY terminal window (except for docker window there's no need to): -$ source /opt/ros/noetic/setup.bash -$ source ~/catkin_ws/devel/setup.bash +``` +source /opt/ros/noetic/setup.bash +source ~/catkin_ws/devel/setup.bash +``` ### Visualization Steps: Please make sure you source each new terminal window after creating it (local source commands are below): -$ source /opt/ros/noetic/setup.bash -$ source ~/catkin_ws/devel/setup.bash +``` +source /opt/ros/noetic/setup.bash +source ~/catkin_ws/devel/setup.bash +``` 1. Start rviz: -$ rviz +``` +rviz +``` 2. Publish a static transform from the map to visualize the published bounding box data: -$ rosrun tf2_ros static_transform_publisher 0 0 0 0 0 0 map currentVehicleFrame +``` +rosrun tf2_ros static_transform_publisher 0 0 0 0 0 0 map currentVehicleFrame +``` 3. In Rviz, click "add" in the bottom left corner. In "By display type", under "jsk_rviz_plugins" select BoundingBoxArray. 4. Expand BoundingBoxArray on the left. Under it you will see "Topic" with a blank space to the right of it. Click the blank space (it's a hidden drop down box) and select the BoundingBoxArray topic to visualize \ No newline at end of file