From 8e8ac77b1537ab187a111606784569bdc33cf034 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius Date: Thu, 15 May 2025 16:16:09 -0500 Subject: [PATCH 1/8] update just GEMstack/onboard/perception for merge --- GEMstack/onboard/perception/README.md | 113 ++ .../onboard/perception/agent_detection.py | 21 - .../onboard/perception/build_point_pillars.sh | 49 + .../onboard/perception/combined_detection.py | 511 +++++++++ .../perception/combined_detection_utils.py | 36 + GEMstack/onboard/perception/cone_detection.py | 16 +- .../perception/pedestrian_detection.py | 399 +++++++ .../onboard/perception/perception_utils.py | 57 +- .../perception/perception_utils_gem.py | 35 + .../onboard/perception/person_detector.py | 48 + .../onboard/perception/point_pillars_node.py | 221 ++++ .../sensorFusion/eval_3d_model_performance.py | 976 ++++++++++++++++++ .../pointpillars_ros/CmakeLists.txt | 26 + .../launch/pointpillars_test.launch | 22 + .../sensorFusion/pointpillars_ros/package.xml | 21 + .../scripts/pointpillars_ros_node.py | 351 +++++++ .../sensorFusion/rosbag_processor.py | 103 ++ .../runner_eval_3d_model_performance.py | 321 ++++++ .../perception/setup/Dockerfile.cuda111 | 97 ++ .../perception/setup/docker-compose.yaml | 45 + .../onboard/perception/state_estimation.py | 6 +- GEMstack/onboard/perception/yolo_node.py | 273 +++++ 22 files changed, 3684 insertions(+), 63 deletions(-) create mode 100644 GEMstack/onboard/perception/README.md create mode 100644 GEMstack/onboard/perception/build_point_pillars.sh create mode 100644 GEMstack/onboard/perception/combined_detection.py create mode 100644 GEMstack/onboard/perception/combined_detection_utils.py create mode 100644 GEMstack/onboard/perception/pedestrian_detection.py create mode 100644 GEMstack/onboard/perception/perception_utils_gem.py create mode 100644 GEMstack/onboard/perception/person_detector.py create mode 100644 GEMstack/onboard/perception/point_pillars_node.py create mode 100644 GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py create mode 100644 GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt create mode 100644 GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch create mode 100644 GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml create mode 100644 GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py create mode 100644 GEMstack/onboard/perception/sensorFusion/rosbag_processor.py create mode 100644 GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py create mode 100644 GEMstack/onboard/perception/setup/Dockerfile.cuda111 create mode 100644 GEMstack/onboard/perception/setup/docker-compose.yaml create mode 100644 GEMstack/onboard/perception/yolo_node.py diff --git a/GEMstack/onboard/perception/README.md b/GEMstack/onboard/perception/README.md new file mode 100644 index 000000000..33876d899 --- /dev/null +++ b/GEMstack/onboard/perception/README.md @@ -0,0 +1,113 @@ +# Perception Team +This folder contains code that is used to detect objects in 3D space and subsequently notify other GEMstack components of the detected objects. It is split up into 3 main areas: Pedestrian detection code created at the beginning-middle of the course, Cone Detection code that is used to detect cones (optionally with orientation), and Sensor Fusion code which fuses YOLO + painted lidar data and PointPillars 3D bounding boxes to detect pedestrians. + +## Cone Detection +A YOLO model was trained to detect the orientations of traffic cones in 3D space to support the Vertical Groups. + +### Relevant Files +- cone_detection.py +- perception_utils.py + + +## Sensor Fusion +To improve the quality of the detected pedestrians, we decided to fuse detections from multiple modalities to take advantage of the strengths each sensor (camera and lidar in our case) provides. We accomplished this by fusing the 3D bounding box detections of pedestrians generated by YOLO (model which detects pedestrians with camera data) + painted lidar data and PointPillars (model which detects pedestrians with only lidar data). + +### Relevant Files +#### Setup files to create PointPillars Cuda 11.1.1 Docker container: +- build_point_pillars.sh +- setup/docker-compose.yaml +- setup/Dockerfile.cuda111 + +#### Code used to detect pedestrians: +- combined_detection.py +- point_pillars_node.py +- yolo_node.py + +#### Code used to analyze the results of detections and extract data from rosbags for further analysis: +- eval_3d_bbox_performance.py +- rosbag_processor.py +- test_eval_3d_bbox_performance.py + +### Local Installation Steps for PointPillars Docker Container +#### READ BEFOREHAND: +- Before perfoming installation steps, please make sure you source ALL terminal windows (except for docker terminal window). +``` +source /opt/ros/noetic/setup.bash +source ~/catkin_ws/devel/setup.bash +``` +- These instructions were written with the assumption that you are running them inside of the outermost GEMstack folder. +- If you have set up issues please read the "Set Up Issues Known Fixes" section at the bottom. + +#### Steps: +1. Install Docker +2. Install Docker Compose +3. A bash script was created to handle docker permissions issues and make the set up process simpler: +``` +cd GEMstack/onboard/perception +bash build_point_pillars.sh +``` +4. Start the container (use sudo if you run into permissions issues) +``` +docker compose -f setup/docker-compose.yaml up +``` +5. Run roscore on local machine (make sure you source first) +``` +roscore +``` +6. Start up YOLO node (make sure you source first): +``` +python3 GEMstack/onboard/perception/yolo_node.py +``` +7. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first): +``` +python3 main.py --variant=detector_only launch/combined_detection.yaml +``` +8. Run a rosbag on a loop (make sure you source first): +``` +rosbag play -l yourRosbagNameGoesHere.bag +``` + +### Vehicle Installation Steps for PointPillars Docker Container +Perform the same setup steps as the above section with the below exceptions: +1. Ensure you source instead with the following command: +``` +source ~/demo_ws/devel/setup.bash +``` +2. Initialize the sensors: +``` +roslaunch basic_launch sensor_init.launch +``` +3. Initialize GNSS (if you need it) +``` +roslaunch basic_launch visualization.launch +``` +4. Do not run a rosbag in Step 8 above (it's not needed since you'll be getting live data from the vehicle) + +#### Known Fixes for Set Up Issues +1. If you get a shape error when creating the "results_normal" variable in yolo_node.py, please downgrade your Ultralytics version to 8.1.5 (this is the version used on the car at the time of writing this): +``` +pip install 'ultralytics==8.1.5' +``` +2. If you run into communication issues with ROS, please make sure you have sourced EVERY terminal window (except for docker window there's no need to): +``` +source /opt/ros/noetic/setup.bash +source ~/catkin_ws/devel/setup.bash +``` + +### Visualization Steps: +Please make sure you source each new terminal window after creating it (local source commands are below): +``` +source /opt/ros/noetic/setup.bash +source ~/catkin_ws/devel/setup.bash +``` + +1. Start rviz: +``` +rviz +``` +2. Publish a static transform from the map to visualize the published bounding box data: +``` +rosrun tf2_ros static_transform_publisher 0 0 0 0 0 0 map currentVehicleFrame +``` +3. In Rviz, click "add" in the bottom left corner. In "By display type", under "jsk_rviz_plugins" select BoundingBoxArray. +4. Expand BoundingBoxArray on the left. Under it you will see "Topic" with a blank space to the right of it. Click the blank space (it's a hidden drop down box) and select the BoundingBoxArray topic to visualize \ No newline at end of file diff --git a/GEMstack/onboard/perception/agent_detection.py b/GEMstack/onboard/perception/agent_detection.py index c80442771..5d600f792 100644 --- a/GEMstack/onboard/perception/agent_detection.py +++ b/GEMstack/onboard/perception/agent_detection.py @@ -31,24 +31,3 @@ def agent_callback(self, name : str, agent : AgentState): def update(self) -> Dict[str,AgentState]: with self.lock: return copy.deepcopy(self.agents) - - -class GazeboAgentDetector(OmniscientAgentDetector): - """Obtains agent detections from the Gazebo simulator using model_states topic""" - def __init__(self, vehicle_interface : GEMInterface, tracked_model_prefixes=None): - super().__init__(vehicle_interface) - - # If specific model prefixes are provided, configure the interface to track them - if tracked_model_prefixes is not None: - # Check if our interface has the tracked_model_prefixes attribute (is a GazeboInterface) - if hasattr(vehicle_interface, 'tracked_model_prefixes'): - vehicle_interface.tracked_model_prefixes = tracked_model_prefixes - print(f"Configured GazeboAgentDetector to track models with prefixes: {tracked_model_prefixes}") - else: - print("Warning: vehicle_interface doesn't support tracked_model_prefixes configuration") - - def initialize(self): - # Use the same agent_detector sensor as OmniscientAgentDetector - # The GazeboInterface implements this with model_states subscription - super().initialize() - print("GazeboAgentDetector initialized and subscribed to model_states") diff --git a/GEMstack/onboard/perception/build_point_pillars.sh b/GEMstack/onboard/perception/build_point_pillars.sh new file mode 100644 index 000000000..829243f2f --- /dev/null +++ b/GEMstack/onboard/perception/build_point_pillars.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Check if the point_pillars_node.py, the helper functions file, and model weights exist +if [ ! -f "point_pillars_node.py" ]; then + echo "ERROR: point_pillars_node.py not found in the current directory!" + echo "Please place your point_pillars_node.py file in the same directory as this script." + exit 1 +fi + +if [ ! -f "combined_detection_utils.py" ]; then + echo "ERROR: combined_detection_utils.py not found in the current directory!" + echo "Please place your combined_detection_utils.py file in the same directory as this script." + exit 1 +fi + +if [ ! -f "epoch_160.pth" ]; then + echo "WARNING: epoch_160.pth model weights not found in the current directory!" + echo "Please place your model weights file in the same directory as this script." + echo "Continue anyway? (y/n)" + read -p ">" choice + if [ "$choice" != "y" ] && [ "$choice" != "Y" ]; then + exit 1 + fi +fi + +echo "Building Point Pillars Docker container..." +export DOCKERFILE=setup/Dockerfile.cuda111 + +# Using sudo to handle permissions +MY_UID=$(id -u) +MY_GID=$(id -g) + +# Attempt to use docker-compose directly, then with sudo if needed (if you uncomment it) +if ! docker compose -f setup/docker-compose.yaml build; then + echo "Uncomment these lines if you wish to use sudo to build container as backup" + # echo "Using sudo to build the container..." + # sudo -E docker compose -f setup/docker-compose.yaml build +fi + +# Notify user of how to run the container +echo "Build complete. To start the container, run:" +echo "docker compose -f setup/docker-compose.yaml up" +echo "" +echo "Or with sudo if you have permission issues:" +echo "sudo docker compose -f setup/docker-compose.yaml up" +echo "" +echo "To run in detached mode (background):" +echo "docker compose -f setup/docker-compose.yaml up -d" +echo "Or: sudo docker compose -f setup/docker-compose.yaml up -d" diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py new file mode 100644 index 000000000..59ab5edae --- /dev/null +++ b/GEMstack/onboard/perception/combined_detection.py @@ -0,0 +1,511 @@ +from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +from ..interface.gem import GEMInterface +from ..component import Component +from .perception_utils import pose_to_matrix +from .perception_utils_gem import * +from typing import Dict, List, Optional, Tuple +import rospy +from message_filters import Subscriber, ApproximateTimeSynchronizer +import time +import os +import yaml +import numpy as np +from scipy.spatial.transform import Rotation as R +from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray +from geometry_msgs.msg import Quaternion + +from .sensorFusion.eval_3d_model_performance import calculate_3d_iou +import copy + + +def average_yaw(yaw1, yaw2): + v1 = np.array([np.cos(yaw1), np.sin(yaw1)]) + v2 = np.array([np.cos(yaw2), np.sin(yaw2)]) + v_avg = (v1 + v2) / 2.0 + return np.arctan2(v_avg[1], v_avg[0]) + + +def quaternion_to_yaw(quaternion_arr): + return R.from_quat(quaternion_arr).as_euler('zyx', degrees=False)[0] + + +def avg_orientations(orientation1, orientation2): + """Average two quaternion orientations by converting to yaw and back.""" + # This function assumes both quaternions are 2D planar rotations around the z axis + quat1 = [orientation1.x, orientation1.y, orientation1.z, orientation1.w] + quat2 = [orientation2.x, orientation2.y, orientation2.z, orientation2.w] + + yaw1 = quaternion_to_yaw(quaternion_arr=quat1) + yaw2 = quaternion_to_yaw(quaternion_arr=quat2) + + # Compute the average yaw and then convert back into quaternions + # (averaging quaternions directly causes problems) + avg_yaw = average_yaw(yaw1=yaw1, yaw2=yaw2) + avg_quat = R.from_euler('z', avg_yaw).as_quat() + + orientation = Quaternion() + orientation.x = float(avg_quat[0]) + orientation.y = float(avg_quat[1]) + orientation.z = float(avg_quat[2]) + orientation.w = float(avg_quat[3]) + + return orientation + + +def merge_boxes(box1: BoundingBox, box2: BoundingBox, mode: str = "Average") -> BoundingBox: + """ + Merge two bounding boxes using the specified mode. + + Args: + box1: First bounding box + box2: Second bounding box + mode: Merging strategy to use. Options include: + - "Average": Simple average of position, dimensions, and orientation (default) + - "Score": Weight average by confidence scores (box.value) + - "Max": Use the entire box with the highest confidence score + + Notes: + - The 'value' field in BoundingBox is used for the confidence score + - The label from the box with higher confidence is used when using Score mode + - In Average mode, the label from the first box is used + - Max mode returns a copy of the box with the highest confidence score + + Returns: + A merged bounding box + """ + + merged_box = BoundingBox() + merged_box.header = box1.header # Use header from one input + + # Get confidence scores + score1 = box1.value if hasattr(box1, 'value') else 0.0 + score2 = box2.value if hasattr(box2, 'value') else 0.0 + + if mode == "Max": + # Use the box with the higher confidence score entirely + if score1 >= score2: + return copy.deepcopy(box1) + else: + return copy.deepcopy(box2) + + elif mode == "Score": + # If both scores are 0, fall back to average + if score1 == 0.0 and score2 == 0.0: + weight1, weight2 = 0.5, 0.5 + else: + # Calculate normalized weights based on scores + total_score = score1 + score2 + weight1 = score1 / total_score + weight2 = score2 / total_score + + # Weighted average of position + merged_box.pose.position.x = (weight1 * box1.pose.position.x) + (weight2 * box2.pose.position.x) + merged_box.pose.position.y = (weight1 * box1.pose.position.y) + (weight2 * box2.pose.position.y) + merged_box.pose.position.z = (weight1 * box1.pose.position.z) + (weight2 * box2.pose.position.z) + + # Weighted average of dimensions + merged_box.dimensions.x = (weight1 * box1.dimensions.x) + (weight2 * box2.dimensions.x) + merged_box.dimensions.y = (weight1 * box1.dimensions.y) + (weight2 * box2.dimensions.y) + merged_box.dimensions.z = (weight1 * box1.dimensions.z) + (weight2 * box2.dimensions.z) + + # For orientation, we still use the average_orientations function + # More advanced: implement weighted quaternion averaging + merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) + + # For label, use the one from the higher-score box + merged_box.label = box1.label if score1 >= score2 else box2.label + + elif mode == "BEV": + # Merge the bounding boxes from Bird's Eye View (BEV) + # Average the x and y centers and dimensions + # Average the yaw orientation + # Use YOLO bounding box (box1) for z dimension and z center + merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0 + merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0 + merged_box.pose.position.z = copy.deepcopy(box1.pose.position.z) + + # Avg orientations (quaternions) + merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) + + merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0 + merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0 + merged_box.dimensions.z = copy.deepcopy(box1.dimensions.z) + + merged_box.label = box1.label # Label from first box + + else: # Default to "Average" mode + # Original averaging logic + merged_box.pose.position.x = (box1.pose.position.x + box2.pose.position.x) / 2.0 + merged_box.pose.position.y = (box1.pose.position.y + box2.pose.position.y) / 2.0 + merged_box.pose.position.z = (box1.pose.position.z + box2.pose.position.z) / 2.0 + + # Avg orientations (quaternions) + merged_box.pose.orientation = avg_orientations(box1.pose.orientation, box2.pose.orientation) + + merged_box.dimensions.x = (box1.dimensions.x + box2.dimensions.x) / 2.0 + merged_box.dimensions.y = (box1.dimensions.y + box2.dimensions.y) / 2.0 + merged_box.dimensions.z = (box1.dimensions.z + box2.dimensions.z) / 2.0 + merged_box.label = box1.label # Label from first box + + # Always use max score for the merged box confidence value + merged_box.value = max(score1, score2) + + return merged_box + + +def get_aabb_corners(box: BoundingBox): + """ + Get axis-aligned bounding box corners for IoU calculation. + Use axis-aligned bounding boxes so it does not consider rotation. + + Returns: + min_x, max_x, min_y, max_y, min_z, max_z coordinates + """ + # Extract position and dimensions from each box + cx, cy, cz = box.pose.position.x, box.pose.position.y, box.pose.position.z + l, w, h = box.dimensions.x, box.dimensions.y, box.dimensions.z + + # min_x, max_x, min_y, max_y, min_z, max_z + return cx, cx + l, cy, cy + w, cz, cz + h + + +def get_volume(box): + """Calculate the volume of a bounding box.""" + return box.dimensions.x * box.dimensions.y * box.dimensions.z + + +def get_bev_aabb_corners(box: BoundingBox): + """ + Get axis-aligned bounding box corners for 2D Bird's Eye View IoU calculation. + Returns: + min_x, max_x, min_y, max_y + """ + cx, cy = box.pose.position.x, box.pose.position.y + l, w = box.dimensions.x, box.dimensions.y + + return cx, cx + l, cy, cy + w + + +def calculate_bev_iou(box1: BoundingBox, box2: BoundingBox): + """ + Calculates the 2D Bird's Eye View IoU between two bounding boxes. + Ignores z-axis and yaw (assumes axis aligned bounding boxes). + """ + min_x1, max_x1, min_y1, max_y1 = get_bev_aabb_corners(box1) + min_x2, max_x2, min_y2, max_y2 = get_bev_aabb_corners(box2) + + # Calculate intersection in BEV + inter_min_x = max(min_x1, min_x2) + inter_max_x = min(max_x1, max_x2) + inter_min_y = max(min_y1, min_y2) + inter_max_y = min(max_y1, max_y2) + + inter_w = max(0, inter_max_x - inter_min_x) + inter_h = max(0, inter_max_y - inter_min_y) + intersection_area = inter_w * inter_h + + # Calculate union area + area1 = max(box1.dimensions.x * box1.dimensions.y, 1e-6) + area2 = max(box2.dimensions.x * box2.dimensions.y, 1e-6) + union_area = max(area1 + area2 - intersection_area, 1e-6) + + iou = intersection_area / union_area + return max(0.0, min(iou, 1.0)) # Clamp to [0, 1] + + +class CombinedDetector3D(Component): + """ + Combines detections from multiple 3D object detectors (YOLO and PointPillars). + + This component subscribes to bounding box outputs from YOLO and PointPillars, + fuses overlapping detections, and outputs a unified set of 3D bounding boxes. + """ + + def __init__( + self, + vehicle_interface: GEMInterface, + enable_tracking: bool = True, + use_start_frame: bool = True, + iou_threshold: float = 0.1, + merge_mode: str = "Average", + **kwargs + ): + self.vehicle_interface = vehicle_interface + self.tracked_agents: Dict[str, AgentState] = {} + self.ped_counter = 0 + self.latest_yolo_bbxs: Optional[BoundingBoxArray] = None + self.latest_pp_bbxs: Optional[BoundingBoxArray] = None + self.start_pose_abs: Optional[ObjectPose] = None + self.start_time: Optional[float] = None + + self.enable_tracking = enable_tracking + self.use_start_frame = use_start_frame + self.iou_threshold = iou_threshold + self.merge_mode = merge_mode + self.merge_in_bev = (merge_mode == "BEV") + + self.yolo_topic = '/yolo_boxes' + self.pp_topic = '/pointpillars_boxes' + self.debug = False + + rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.") + rospy.loginfo(f"Using merge mode: {self.merge_mode}") + + def rate(self) -> float: + return 8.0 + + def state_inputs(self) -> list: + return ['vehicle'] + + def state_outputs(self) -> list: + return ['agents'] + + def initialize(self): + """Initialize subscribers and publishers.""" + self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray) + self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray) + self.pub_fused = rospy.Publisher("/fused_boxes", BoundingBoxArray, queue_size=1) + + queue_size = 10 + slop = 0.1 + + self.sync = ApproximateTimeSynchronizer( + [self.yolo_sub, self.pp_sub], + queue_size=queue_size, + slop=slop + ) + self.sync.registerCallback(self.synchronized_callback) + rospy.loginfo("CombinedDetector3D Subscribers Initialized.") + + def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: BoundingBoxArray): + """Callback for synchronized YOLO and PointPillars messages.""" + self.latest_yolo_bbxs = yolo_bbxs_msg + self.latest_pp_bbxs = pp_bbxs_msg + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + """Update function called by the GEMstack pipeline.""" + current_time = self.vehicle_interface.time() + agents: Dict[str, AgentState] = {} + + if self.start_time is None: + self.start_time = current_time + + yolo_bbx_array = copy.deepcopy(self.latest_yolo_bbxs) + pp_bbx_array = copy.deepcopy(self.latest_pp_bbxs) + + if yolo_bbx_array is None or pp_bbx_array is None: + return {} + + original_header = yolo_bbx_array.header + fused_boxes_list = self._fuse_bounding_boxes(yolo_bbx_array, pp_bbx_array) + + # Used to visualize the combined results in the current frame + fused_bb_array = BoundingBoxArray() + fused_bb_array.header = original_header + + for i, box in enumerate(fused_boxes_list): + fused_bb_array.boxes.append(box) + rospy.loginfo(len(fused_boxes_list)) + + # Get position and orientation in current vehicle frame + # pos_x - z are returned as Quaternions. + pos_x = box.pose.position.x + pos_y = box.pose.position.y + pos_z = box.pose.position.z + quat_x = box.pose.orientation.x + quat_y = box.pose.orientation.y + quat_z = box.pose.orientation.z + quat_w = box.pose.orientation.w + yaw, pitch, roll = R.from_quat([quat_x, quat_y, quat_z, quat_w]).as_euler('zyx', degrees=False) + + if self.use_start_frame: + out_frame = ObjectFrameEnum.START + # Get the starting vehicle pose + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + + # Convert to start frame + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs + ) + # T_vehicle_to_start = pose_to_matrix(vehicle_start_pose) + T_vehicle_to_start = vehicle_start_pose.transform() + object_pose_current_h = np.array([[pos_x],[pos_y],[pos_z],[1.0]]) + object_pose_start_h = T_vehicle_to_start @ object_pose_current_h + final_x, final_y, final_z = object_pose_start_h[:3, 0] + else: + out_frame = ObjectFrameEnum.CURRENT + final_x = pos_x + final_y = pos_y + final_z = pos_z + + new_pose = ObjectPose( + t=current_time, x=final_x, y=final_y, z=final_z, + yaw=yaw, pitch=pitch, roll=roll, frame=out_frame + ) + dims = (box.dimensions.x, box.dimensions.y, box.dimensions.z * 2.0) # AgentState has z center on the floor and height is full height. + + new_pose = ObjectPose( + t=current_time, x=final_x, y=final_y, z=final_z - box.dimensions.z / 2.0, + yaw=yaw, pitch=pitch, roll=roll, frame=out_frame + ) + + existing_id = match_existing_cone( + new_center=np.array([new_pose.x, new_pose.y, new_pose.z]), + new_dims=dims, + existing_agents=self.tracked_agents, + distance_threshold=2.0 + ) + + if existing_id is not None: + old_state = self.tracked_agents[existing_id] + dt = new_pose.t - old_state.pose.t + vx, vy, vz = compute_velocity(old_state.pose, new_pose, dt) + updated_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(vx, vy, vz), + yaw_rate=0 + ) + agents[existing_id] = updated_agent + self.tracked_agents[existing_id] = updated_agent + else: + agent_id = f"Pedestrian{self.ped_counter}" + self.ped_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[agent_id] = new_agent + self.tracked_agents[agent_id] = new_agent + + self.pub_fused.publish(fused_bb_array) + + stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() + if current_time - agent.pose.t > 5.0] + for agent_id in stale_ids: + rospy.loginfo(f"Removing stale agent: {agent_id}\n") + for agent_id, agent in agents.items(): + p = agent.pose + # Format pose and velocity with 3 decimals (or as needed) + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + ) + + return agents + + + def _fuse_bounding_boxes(self, + yolo_bbx_array: BoundingBoxArray, + pp_bbx_array: BoundingBoxArray, + ): + """ + Fuse bounding boxes from multiple detectors. + + Args: + yolo_bbx_array: Bounding boxes from YOLO detector + pp_bbx_array: Bounding boxes from PointPillars detector + """ + yolo_boxes: List[BoundingBox] = yolo_bbx_array.boxes + pp_boxes: List[BoundingBox] = pp_bbx_array.boxes + + matched_yolo_indices = set() + matched_pp_indices = set() + fused_boxes_list: List[BoundingBox] = [] + + # Match the boxes + # Can optimize from NxM loop + for i, yolo_box in enumerate(yolo_boxes): + best_match_j = -1 + best_iou = -1.0 + for j, pp_box in enumerate(pp_boxes): + if j in matched_pp_indices: # Skip already matched PP boxes + continue + + ## IoU + iou = None + if self.merge_in_bev: + iou = calculate_bev_iou(yolo_box, pp_box) + else: + iou = calculate_3d_iou(yolo_box, pp_box, get_aabb_corners, get_volume) + + if iou > self.iou_threshold and iou > best_iou: + best_iou = iou + best_match_j = j + + if best_match_j != -1: + rospy.logdebug(f"Matched YOLO box {i} with PP box {best_match_j} (IoU: {best_iou:.3f})") + matched_yolo_indices.add(i) + matched_pp_indices.add(best_match_j) + rospy.logdebug(f"Using merge mode: {self.merge_mode} for boxes with scores {yolo_box.value:.2f} and {pp_boxes[best_match_j].value:.2f}") + merged = merge_boxes(yolo_box, pp_boxes[best_match_j], mode=self.merge_mode) + fused_boxes_list.append(merged) + + ## Add the unmatched YOLO boxes + for i, yolo_box in enumerate(yolo_boxes): + if i not in matched_yolo_indices: + fused_boxes_list.append(yolo_box) + rospy.logdebug(f"Kept unmatched YOLO box {i}") + + # Add the unmatched PointPillars boxes + for j, pp_box in enumerate(pp_boxes): + if j not in matched_pp_indices: + fused_boxes_list.append(pp_box) + rospy.logdebug(f"Kept unmatched PP box {j}") + + return fused_boxes_list + + +# Fake 2D Combined Detector for testing purposes +class FakeCombinedDetector2D(Component): + """Test detector that simulates cone detections at fixed time intervals.""" + + def __init__(self, vehicle_interface: GEMInterface): + self.vehicle_interface = vehicle_interface + self.times = [(5.0, 20.0), (30.0, 35.0)] + self.t_start = None + + def rate(self): + return 4.0 + + def state_inputs(self): + return ['vehicle'] + + def state_outputs(self): + return ['agents'] + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + if self.t_start is None: + self.t_start = self.vehicle_interface.time() + t = self.vehicle_interface.time() - self.t_start + res = {} + for time_range in self.times: + if t >= time_range[0] and t <= time_range[1]: + res['cone0'] = box_to_fake_agent((0, 0, 0, 0)) + rospy.loginfo("Detected a Cone (simulated)") + return res + + +def box_to_fake_agent(box): + """Convert 2D bounding box to a fake agent state.""" + x, y, w, h = box + pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) + dims = (w, h, 0) + return AgentState(pose=pose, dimensions=dims, outline=None, + type=AgentEnum.CONE, activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), yaw_rate=0) + + +if __name__ == '__main__': + pass diff --git a/GEMstack/onboard/perception/combined_detection_utils.py b/GEMstack/onboard/perception/combined_detection_utils.py new file mode 100644 index 000000000..28673e1a5 --- /dev/null +++ b/GEMstack/onboard/perception/combined_detection_utils.py @@ -0,0 +1,36 @@ +from jsk_recognition_msgs.msg import BoundingBox +from scipy.spatial.transform import Rotation as R + +# Seperated Sensor Fusion specific utilities into it's own file because package imports are required +# (to minimize integration problems for groups who aren't using sensor fusion) + +# Adds a new bounding box to a BoundingBoxArray +def add_bounding_box(boxes, frame_id, stamp, x, y, z, l, w, h, yaw, conf_score, label): + box_msg = BoundingBox() + box_msg.header.frame_id = frame_id + box_msg.header.stamp = stamp + + # Set the pose + box_msg.pose.position.x = float(x) + box_msg.pose.position.y = float(y) + box_msg.pose.position.z = float(z) + + # Convert yaw to quaternion + quat = R.from_euler('z', yaw).as_quat() + box_msg.pose.orientation.x = float(quat[0]) + box_msg.pose.orientation.y = float(quat[1]) + box_msg.pose.orientation.z = float(quat[2]) + box_msg.pose.orientation.w = float(quat[3]) + + # Set the dimensions + # Swapped dims[2] and dims[0] + box_msg.dimensions.x = float(l) # length + box_msg.dimensions.y = float(w) # width + box_msg.dimensions.z = float(h) # height + + # Add confidence score and label + box_msg.value = float(conf_score) + box_msg.label = label + + boxes.boxes.append(box_msg) + return boxes diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index ab83d03df..e2d438ace 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -3,6 +3,7 @@ from ..interface.gem import GEMInterface from ..component import Component from .perception_utils import * +from .perception_utils_gem import * from ultralytics import YOLO import cv2 from typing import Dict @@ -87,6 +88,7 @@ def __init__( def rate(self) -> float: return 8 + def state_inputs(self) -> list: return ['vehicle'] @@ -94,6 +96,7 @@ def state_outputs(self) -> list: return ['obstacles'] def initialize(self): + # --- Determine the correct RGB topic for this camera --- rgb_topic_map = { 'front': '/oak/rgb/image_raw', @@ -166,6 +169,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: self.start_time = current_time time_elapsed = current_time - self.start_time + # Ensure data/ exists and build timestamp if self.save_data: self.save_sensor_data(vehicle=vehicle, latest_image=latest_image) @@ -243,6 +247,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) cv2.imshow("Detection - Cone 2D", undistorted_img) + start = time.time() # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) @@ -272,6 +277,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: if roi_pts.shape[0] < 5: continue + points_3d = roi_pts[:, 2:5] points_3d = filter_points_within_threshold(points_3d, 40) @@ -361,7 +367,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, - collidable=True + collidable=False ) else: updated_obstacle = old_state @@ -376,7 +382,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, - collidable=True + collidable=False ) obstacles[obstacle_id] = new_obstacle self.tracked_obstacles[obstacle_id] = new_obstacle @@ -389,7 +395,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, - collidable = True + collidable=False ) obstacles[obstacle_id] = new_obstacle @@ -502,8 +508,8 @@ def box_to_fake_obstacle(box): pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) dims = (w, h, 0) return Obstacle(pose=pose, dimensions=dims, outline=None, - material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING, collidable=True) + material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING) if __name__ == '__main__': - pass \ No newline at end of file + pass diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py new file mode 100644 index 000000000..8f1e41131 --- /dev/null +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -0,0 +1,399 @@ +from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum +from ..interface.gem import GEMInterface +from ..component import Component +from .perception_utils import * # If you want to alias functions for clarity, do so in perception_utils +from .perception_utils_gem import * +from ultralytics import YOLO +from typing import Dict +import open3d as o3d +import numpy as np +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import os +import yaml + + +class PedestrianDetector3D(Component): + """ + Detects pedestrians by fusing YOLO 2D detections with LiDAR point cloud data. + Tracking is optional: `enable_tracking=False` returns only current-frame detections. + Calculates and outputs each pedestrian's velocity and yaw rate. + """ + + def __init__( + self, + vehicle_interface: GEMInterface, + camera_name: str, + camera_calib_file: str, + enable_tracking: bool = True, + visualize_2d: bool = False, + use_cyl_roi: bool = False, + T_l2v: list = None, + save_data: bool = True, + use_start_frame: bool = True, + **kwargs + ): + # Core interfaces and state + self.vehicle_interface = vehicle_interface + self.current_agents = {} + self.tracked_agents = {} + self.pedestrian_counter = 0 + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.start_pose_abs = None + self.start_time = None + + # Configuration + self.camera_name = camera_name + self.enable_tracking = enable_tracking + self.visualize_2d = visualize_2d + self.use_cyl_roi = use_cyl_roi + self.save_data = save_data + self.use_start_frame = use_start_frame + + # 1) Load LiDAR-to-vehicle transform + self.T_l2v = np.array(T_l2v) if T_l2v is not None else np.array([ + [0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0.0, 0.0, 0.0, 1.0] + ]) + + # 2) Load camera intrinsics/extrinsics from YAML + with open(camera_calib_file, 'r') as f: + calib = yaml.safe_load(f) + cam_cfg = calib['cameras'][camera_name] + self.K = np.array(cam_cfg['K']) + self.D = np.array(cam_cfg['D']) + self.T_l2c = np.array(cam_cfg['T_l2c']) + + self.undistort_map1 = None + self.undistort_map2 = None + self.camera_front = (camera_name == 'front') + + def rate(self) -> float: + return 8.0 + + def state_inputs(self) -> list: + return ['vehicle'] + + def state_outputs(self) -> list: + return ['agents'] + + def initialize(self): + # Subscribe to RGB & LiDAR streams + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + } + rgb_topic = rgb_topic_map.get(self.camera_name, + f'/{self.camera_name}/rgb/image_raw') + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer( + [self.rgb_sub, self.lidar_sub], queue_size=500, slop=0.025 + ) + self.sync.registerCallback(self.synchronized_callback) + + # Initialize YOLO pedestrian detection model (COCO class 0) + self.detector = YOLO('GEMstack/knowledge/detection/yolov8n.pt') + self.detector.to('cuda') + + # Compute camera-to-LiDAR transform + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] + self.camera_origin_in_lidar = self.T_c2l[:3, 3] + + def synchronized_callback(self, image_msg, lidar_msg): + try: + self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") + except Exception as e: + rospy.logerr(f"Failed to convert image: {e}") + self.latest_image = None + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + + def undistort_image(self, image, K, D): + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + if self.undistort_map1 is None: + self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap( + K, D, None, newK, (w, h), cv2.CV_32FC1 + ) + undistorted = cv2.remap( + image, self.undistort_map1, self.undistort_map2, + interpolation=cv2.INTER_NEAREST + ) + return undistorted, newK + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + # Ensure both image and LiDAR data are available + if self.latest_image is None or self.latest_lidar is None: + return {} + + current_time = self.vehicle_interface.time() + if self.start_time is None: + self.start_time = current_time + + # Optionally save raw sensor data + if self.save_data: + self.save_sensor_data(vehicle, self.latest_image) + + # Undistort image if needed + if not self.camera_front: + img, self.current_K = self.undistort_image( + self.latest_image, self.K, self.D) + else: + img = self.latest_image.copy() + self.current_K = self.K + + # Perform 2D detection + results = self.detector(img, conf=0.35, classes=[0]) + boxes2d = (np.array(results[0].boxes.xywh.cpu()) + if len(results) > 0 else []) + + # Project LiDAR points into image plane + lidar_pts = self.latest_lidar.copy() + pts_cam = transform_points_l2c(lidar_pts, self.T_l2c) + projected = project_points(pts_cam, self.current_K, lidar_pts) + + agents: Dict[str, AgentState] = {} + for (cx, cy, w, h) in boxes2d: + # print(cx, cy, w, h) + # Define ROI in image for each detection + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + mask = ((projected[:, 0] >= left) & (projected[:, 0] <= right) & + (projected[:, 1] >= top) & (projected[:, 1] <= bottom)) + roi = projected[mask] + if roi.shape[0] < 5: + continue + + # Filter point cloud + pts3d = roi[:, 2:5] + pts3d = filter_points_within_threshold(pts3d, 40) + pts3d = remove_ground_by_min_range(pts3d, z_range=0.08) + pts3d = filter_depth_points(pts3d, max_depth_diff=0.5) + + if self.use_cyl_roi: + glob = filter_points_within_threshold(lidar_pts, 30) + cyl = cylindrical_roi(glob, np.mean(pts3d, axis=0), radius=0.5, height=2.0) + pts3d = remove_ground_by_min_range(cyl, z_range=0.01) + pts3d = filter_depth_points(pts3d, max_depth_diff=0.3) + + if pts3d.shape[0] < 4: + continue + + # Fit oriented bounding box + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(pts3d) + obb = pcd.get_oriented_bounding_box() + center = obb.center + dims = tuple(obb.extent) + R_lidar = obb.R + + # Transform to vehicle frame + c_hom = np.append(center, 1) + veh_hom = self.T_l2v @ c_hom + veh_center = veh_hom[:3] + + # Compute orientation + R_veh = self.T_l2v[:3, :3] @ R_lidar + yaw, pitch, roll = R.from_matrix(R_veh).as_euler('zyx', degrees=False) + + # Convert to START or CURRENT frame + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs) + T_vs = pose_to_matrix(start_pose) + xp, yp, zp = (T_vs @ np.append(veh_center, 1))[:3] + frame = ObjectFrameEnum.START + else: + xp, yp, zp = veh_center + frame = ObjectFrameEnum.CURRENT + + new_pose = ObjectPose( + t=current_time, + x=xp, y=yp, z=zp, + yaw=yaw, pitch=pitch, roll=roll, + frame=frame + ) + + # Matching and tracking + if self.enable_tracking: + existing = match_existing_cone( + np.array([new_pose.x, new_pose.y, new_pose.z]), + dims, self.tracked_agents, + distance_threshold=1.0 + ) + if existing is not None: + old = self.tracked_agents[existing] + dt = max(new_pose.t - old.pose.t, 1e-3) + # Instantaneous velocity + vx = (new_pose.x - old.pose.x) / dt + vy = (new_pose.y - old.pose.y) / dt + vz = (new_pose.z - old.pose.z) / dt + # Yaw rate + yaw_rate = (new_pose.yaw - old.pose.yaw) / dt + speed = np.linalg.norm([vx, vy, vz]) + activity = (AgentActivityEnum.MOVING + if speed > 0.1 else AgentActivityEnum.STOPPED) + + updated = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=activity, + velocity=(vx, vy, vz), + yaw_rate=yaw_rate + ) + agents[existing] = updated + self.tracked_agents[existing] = updated + + else: + aid = f"Pedestrian{self.pedestrian_counter}" + self.pedestrian_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.STOPPED, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[aid] = new_agent + self.tracked_agents[aid] = new_agent + for agent_id, agent in self.current_agents.items(): + p = agent.pose + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + f"type:{agent.activity}" + ) + else: + aid = f"Pedestrian{self.pedestrian_counter}" + self.pedestrian_counter += 1 + agents[aid] = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.STOPPED, + velocity=(0, 0, 0), + yaw_rate=0 + ) + + self.current_agents = agents + + # Remove stale tracked agents + if self.enable_tracking: + stale = [ + aid for aid, a in self.tracked_agents.items() + if current_time - a.pose.t > 10.0 + ] + for aid in stale: + del self.tracked_agents[aid] + return self.tracked_agents + + return self.current_agents + + def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None: + os.makedirs("data", exist_ok=True) + t = int(self.vehicle_interface.time() * 1000) + cv2.imwrite(f"data/{t}_image.png", latest_image) + np.savez(f"data/{t}_lidar.npz", lidar=self.latest_lidar) + # Write BEFORE_TRANSFORM state + with open(f"data/{t}_vehstate.txt", "w") as f: + vp = vehicle.pose + f.write( + f"BEFORE_TRANSFORM " + f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " + f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" + ) + # Compute start or current frame state + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + mode = "START" + else: + vehicle_start_pose = vehicle.pose + mode = "CURRENT" + with open(f"data/{t}_vehstate.txt", "a") as f: + f.write( + f"AFTER_TRANSFORM " + f"x={vehicle_start_pose.x:.3f}, " + f"y={vehicle_start_pose.y:.3f}, " + f"z={vehicle_start_pose.z:.3f}, " + f"yaw={vehicle_start_pose.yaw:.2f}, " + f"pitch={vehicle_start_pose.pitch:.2f}, " + f"roll={vehicle_start_pose.roll:.2f}, " + f"frame={mode}\n" + ) + + +# ----- Fake Cone Detector 2D (for Testing Purposes) ----- +class FakConeDetector(Component): + def __init__(self, vehicle_interface: GEMInterface): + self.vehicle_interface = vehicle_interface + self.times = [(5.0, 20.0), (30.0, 35.0)] + self.t_start = None + + def rate(self): + return 4.0 + + def state_inputs(self): + return ['vehicle'] + + def state_outputs(self): + return ['agents'] + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + if self.t_start is None: + self.t_start = self.vehicle_interface.time() + t = self.vehicle_interface.time() - self.t_start + res = {} + for time_range in self.times: + if time_range[0] <= t <= time_range[1]: + res['Pedestrian0'] = box_to_fake_agent((0, 0, 0, 0)) + rospy.loginfo("Detected a Pedestrian (simulated)") + return res + + +def box_to_fake_agent(box): + x, y, w, h = box + pose = ObjectPose( + t=0, x=x + w / 2, y=y + h / 2, z=0, + yaw=0, pitch=0, roll=0, + frame=ObjectFrameEnum.CURRENT + ) + dims = (w, h, 0) + return AgentState( + pose=pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), + yaw_rate=0 + ) + + +if __name__ == '__main__': + pass diff --git a/GEMstack/onboard/perception/perception_utils.py b/GEMstack/onboard/perception/perception_utils.py index a95172d2f..fd68e43f3 100644 --- a/GEMstack/onboard/perception/perception_utils.py +++ b/GEMstack/onboard/perception/perception_utils.py @@ -1,4 +1,3 @@ -from ...state import ObjectPose, AgentState from typing import Dict import open3d as o3d import numpy as np @@ -7,6 +6,7 @@ from sensor_msgs.msg import PointCloud2 import sensor_msgs.point_cloud2 as pc2 import ros_numpy +import math # ----- Helper Functions ----- @@ -23,37 +23,6 @@ def filter_points_within_threshold(points, threshold=15.0): mask = distances <= threshold return points[mask] -def match_existing_cone( - new_center: np.ndarray, - new_dims: tuple, - existing_agents: Dict[str, AgentState], - distance_threshold: float = 1.0 -) -> str: - """ - Find the closest existing Cone agent within a specified distance threshold. - """ - best_agent_id = None - best_dist = float('inf') - for agent_id, agent_state in existing_agents.items(): - old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) - dist = np.linalg.norm(new_center - old_center) - if dist < distance_threshold and dist < best_dist: - best_dist = dist - best_agent_id = agent_id - return best_agent_id - - -def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: - """ - Compute the (vx, vy, vz) velocity based on change in pose over time. - """ - if dt <= 0: - return (0, 0, 0) - vx = (new_pose.x - old_pose.x) / dt - vy = (new_pose.y - old_pose.y) / dt - vz = (new_pose.z - old_pose.z) / dt - return (vx, vy, vz) - def extract_roi_box(lidar_pc, center, half_extents): """ @@ -224,3 +193,27 @@ def project_points(pts_cam, K, original_lidar_points): v = (K[1, 1] * (Yc / Zc) + K[1, 2]).astype(np.int32) proj = np.column_stack((u, v, lidar_valid)) return proj + +def pose_to_matrix(pose): + """ + Compose a 4x4 transformation matrix from a pose state. + Assumes pose has attributes: x, y, z, yaw, pitch, roll, + where the angles are given in degrees. + """ + # Use default values if any are None (e.g. if the car is not moving) + x = pose.x if pose.x is not None else 0.0 + y = pose.y if pose.y is not None else 0.0 + z = pose.z if pose.z is not None else 0.0 + if pose.yaw is not None and pose.pitch is not None and pose.roll is not None: + yaw = math.radians(pose.yaw) + pitch = math.radians(pose.pitch) + roll = math.radians(pose.roll) + else: + yaw = 0.0 + pitch = 0.0 + roll = 0.0 + R_mat = R.from_euler('zyx', [yaw, pitch, roll]).as_matrix() + T = np.eye(4) + T[:3, :3] = R_mat + T[:3, 3] = np.array([x, y, z]) + return T \ No newline at end of file diff --git a/GEMstack/onboard/perception/perception_utils_gem.py b/GEMstack/onboard/perception/perception_utils_gem.py new file mode 100644 index 000000000..e68e7db0c --- /dev/null +++ b/GEMstack/onboard/perception/perception_utils_gem.py @@ -0,0 +1,35 @@ +from ...state import ObjectPose, AgentState +from typing import Dict +import numpy as np +# Had to move these functions to a seperate file because they use relative imports + + +def match_existing_cone( + new_center: np.ndarray, + new_dims: tuple, + existing_agents: Dict[str, AgentState], + distance_threshold: float = 1.0 +) -> str: + """ + Find the closest existing Cone agent within a specified distance threshold. + """ + best_agent_id = None + best_dist = float('inf') + for agent_id, agent_state in existing_agents.items(): + old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) + dist = np.linalg.norm(new_center - old_center) + if dist < distance_threshold and dist < best_dist: + best_dist = dist + best_agent_id = agent_id + return best_agent_id + +def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: + """ + Compute the (vx, vy, vz) velocity based on change in pose over time. + """ + if dt <= 0: + return (0, 0, 0) + vx = (new_pose.x - old_pose.x) / dt + vy = (new_pose.y - old_pose.y) / dt + vz = (new_pose.z - old_pose.z) / dt + return (vx, vy, vz) \ No newline at end of file diff --git a/GEMstack/onboard/perception/person_detector.py b/GEMstack/onboard/perception/person_detector.py new file mode 100644 index 000000000..8f4fa3588 --- /dev/null +++ b/GEMstack/onboard/perception/person_detector.py @@ -0,0 +1,48 @@ +#from ultralytics import YOLO +import cv2 +import sys + +def person_detector(img : cv2.Mat): + #TODO: implement me to produce a list of (x,y,w,h) bounding boxes of people in the image + return [] + +def main(fn): + image = cv2.imread(fn) + bboxes = person_detector(image) + print("Detected",len(bboxes),"people") + for bb in bboxes: + x,y,w,h = bb + if not isinstance(x,(int,float)) or not isinstance(y,(int,float)) or not isinstance(w,(int,float)) or not isinstance(h,(int,float)): + print("WARNING: make sure to return Python numbers rather than PyTorch Tensors") + print("Corner",(x,y),"size",(w,h)) + cv2.rectangle(image, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (255, 0, 255), 3) + cv2.imshow('Results', image) + cv2.waitKey(0) + +def main_webcam(): + cap = cv2.VideoCapture(0) + cap.set(3, 640) + cap.set(4, 480) + + print("Press space to exit") + while True: + _, image = cap.read() + + bboxes = person_detector(image) + for bb in bboxes: + x,y,w,h = bb + cv2.rectangle(image, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (255, 0, 255), 3) + + cv2.imshow('Person detection', image) + if cv2.waitKey(1) & 0xFF == ord(' '): + break + + cap.release() + + +if __name__ == '__main__': + fn = sys.argv[1] + if fn != 'webcam': + main(fn) + else: + main_webcam() \ No newline at end of file diff --git a/GEMstack/onboard/perception/point_pillars_node.py b/GEMstack/onboard/perception/point_pillars_node.py new file mode 100644 index 000000000..878132b44 --- /dev/null +++ b/GEMstack/onboard/perception/point_pillars_node.py @@ -0,0 +1,221 @@ +from combined_detection_utils import add_bounding_box +import numpy as np +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import os +import ros_numpy + +# PointPillars imports: +import torch +from pointpillars.model import PointPillars + +# Publisher imports: +from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray +from geometry_msgs.msg import Pose, Vector3 + + +def pc2_to_numpy_with_intensity(pc2_msg, want_rgb=False): + """ + Convert a ROS PointCloud2 message into a numpy array quickly using ros_numpy. + This function extracts the x, y, z coordinates and reflecivity from the point cloud. + """ + # Convert the ROS message to a numpy structured array + pc = ros_numpy.point_cloud2.pointcloud2_to_array(pc2_msg) + reflectivity = np.array(pc['reflectivity']).ravel() + intensity = np.array(pc['intensity']).ravel() + + # Normalize reflectivity to 0-1 range + normalized_reflectivity = reflectivity / 255.0 + + # Stack x,y,z, r fields to a (N,4) array + pts = np.stack((np.array(pc['x']).ravel(), + np.array(pc['y']).ravel(), + np.array(pc['z']).ravel(), + normalized_reflectivity), + axis=1) + + # Restrict points to the model's default range: + x_min, y_min, z_min = 0, -39.68, -3 + x_max, y_max, z_max = 69.12, 39.68, 1 + + mask = ( + (pts[:, 0] >= x_min) & (pts[:, 0] <= x_max) & + (pts[:, 1] >= y_min) & (pts[:, 1] <= y_max) & + (pts[:, 2] >= z_min) & (pts[:, 2] <= z_max) + ) + return pts[mask] + + +class PointPillarsNode: + """ + Detects Pedestrians with PointPillars and publishes the results in vehicle frame. + """ + + def __init__(self): + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.camera_name = 'front' + self.camera_front = (self.camera_name == 'front') + self.score_threshold = 0.4 + self.debug = True + self.initialize() + + def initialize(self): + """Initialize the PointPillars node with model and ROS connections.""" + # Determine the correct RGB topic for this camera + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + # add additional camera mappings here if needed + } + rgb_topic = rgb_topic_map.get( + self.camera_name, + f'/{self.camera_name}/rgb/image_raw' + ) + + # Initialize PointPillars node + rospy.init_node('pointpillars_box_publisher') + + # Create bounding box publisher + self.pub = rospy.Publisher('/pointpillars_boxes', BoundingBoxArray, queue_size=1) + rospy.loginfo("PointPillars node initialized and waiting for messages.") + + # Initialize PointPillars + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + self.pointpillars = PointPillars( + nclasses=3, + voxel_size=[0.16, 0.16, 4], + point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], + max_num_points=32, + max_voxels=(16000, 40000) + ) + self.pointpillars.to(device) + + # Load model weights + model_path = 'epoch_160.pth' + checkpoint = torch.load(model_path) + self.pointpillars.load_state_dict(checkpoint) + self.pointpillars.eval() + rospy.loginfo("PointPillars model loaded successfully") + + # Transformation matrix from LiDAR to vehicle frame + self.T_l2v = np.array([ + [0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0., 0., 0., 1.] + ]) + + # Subscribe to the RGB and LiDAR streams + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer( + [self.rgb_sub, self.lidar_sub], + queue_size=10, + slop=0.1 + ) + self.sync.registerCallback(self.synchronized_callback) + + def synchronized_callback(self, image_msg, lidar_msg): + """Process synchronized RGB and LiDAR messages to detect pedestrians.""" + rospy.loginfo("Received synchronized RGB and LiDAR messages") + + # Convert LiDAR message to numpy array + self.latest_lidar = pc2_to_numpy_with_intensity(lidar_msg, want_rgb=False) + + downsample = False + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + # Create empty list of bounding boxes to fill + boxes = BoundingBoxArray() + boxes.header.frame_id = 'currentVehicleFrame' + boxes.header.stamp = lidar_msg.header.stamp + + with torch.no_grad(): + # Convert to tensor and format for PointPillars + lidar_tensor = torch.from_numpy(lidar_down).float() + if torch.cuda.is_available(): + lidar_tensor = lidar_tensor.cuda() + + # Format as batch with a single point cloud + batched_pts = [lidar_tensor] + + # Get PointPillars predictions + results = self.pointpillars.forward(batched_pts, mode='test') + + if results and len(results) > 0 and 'lidar_bboxes' in results[0]: + # Process PointPillars results + pp_bboxes = results[0]['lidar_bboxes'] + pp_labels = results[0]['labels'] + pp_scores = results[0]['scores'] + + for i, (bbox, label, score) in enumerate(zip(pp_bboxes, pp_labels, pp_scores)): + # Skip low confidence detections and non-pedestrians (assuming class is 0) + if (score < self.score_threshold) or (label != 0): + continue + + # Extract center position and dimensions + x, y, z, l, w, h, yaw = bbox + + # Transform from LiDAR to vehicle coordinates + center_lidar = np.array([x, y, z, 1.0]) + center_vehicle = self.T_l2v @ center_lidar + x_vehicle, y_vehicle, z_vehicle = center_vehicle[:3] + + # Transform rotation from LiDAR to vehicle frame + R_lidar = R.from_euler('z', yaw).as_matrix() + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + vehicle_yaw, vehicle_pitch, vehicle_roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + + # Modify bounding box so part of it is longer below ground level + bottom = z_vehicle - h / 2.0 + if bottom < 0.0: + # Cut the box at ground level: + bottom = 0.0 + top = z_vehicle + h / 2.0 + + # Or Shift the box vertically to make it sit at ground level: + # top = z_vehicle + h / 2.0 + # top = top + abs(bottom) + # bottom = 0.0 + + z_vehicle = (top - bottom) / 2.0 + h = abs(top - z_vehicle) + + # Add the bounding box + boxes = add_bounding_box( + boxes=boxes, + frame_id='currentVehicleFrame', + stamp=lidar_msg.header.stamp, + x=x_vehicle, + y=y_vehicle, + z=z_vehicle, + l=l, # length + w=w, # width + h=h, # height + yaw=vehicle_yaw, + conf_score=score, + label=label # person/pedestrian class + ) + + rospy.loginfo(f"Pedestrian detected at ({x:.2f}, {y:.2f}, {z:.2f}) with score {score:.2f}") + + # Publish the bounding boxes + rospy.loginfo(f"Publishing {len(boxes.boxes)} pedestrian bounding boxes") + self.pub.publish(boxes) + + +if __name__ == '__main__': + try: + node = PointPillarsNode() + rospy.spin() + except rospy.ROSInterruptException: + pass diff --git a/GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py b/GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py new file mode 100644 index 000000000..98b12e804 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py @@ -0,0 +1,976 @@ +import argparse +import os +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from collections import defaultdict +import math +import warnings +import traceback +import csv + +######## Bbox coord mapping +IDX_X, IDX_Y, IDX_Z = 0, 1, 2 +IDX_L, IDX_W, IDX_H = 3, 4, 5 +IDX_YAW = 6 +IDX_CLASS = 7 +IDX_SCORE = 8 + +######## Helper Functions + +def parse_box_line(line, is_gt=False): + """ + Parses a line from a KITTI format label file. + Handles the conversion from KITTI's y-coordinate (bottom of box) + to the geometric center y used internally. + """ + parts = line.strip().split() + try: + # Parse KITTI format: + # type truncated occluded alpha bbox_2d(4) dims(3) loc(3) rotation_y (score) + # 0 1 2 3 4-7 8-10 11-13 14 (15) + + # Check minimum length for standard KITTI format (15 fields without score) + if len(parts) < 15: + raise ValueError(f"Line does not have enough parts for KITTI format (expected >= 15, got {len(parts)})") + + class_label = parts[0] + + # Read dimensions (h, w, l) from KITTI standard indices 8, 9, 10 + h = float(parts[8]) + w = float(parts[9]) + l = float(parts[10]) + + # Read location (x, y_bottom, z) from KITTI standard indices 11, 12, 13 + loc_x = float(parts[11]) + loc_y_bottom = float(parts[12]) # KITTI 'y' is bottom of the box in camera coords + loc_z = float(parts[13]) + + # Read rotation_y (yaw) from KITTI standard index 14 + rot_y = float(parts[14]) + + # Convert to internal format + # Internal format: [cx, cy, cz, l, w, h, yaw, class_label, score] + + # Use KITTI loc_x and loc_z directly for internal cx, cz + cx = loc_x + cz = loc_z + + # Convert KITTI y (bottom of box) to geometric center y for internal use. + # In KITTI camera coords, Y points down, so center is 'above' bottom (numerically smaller y): loc_y - h / 2 + cy = loc_y_bottom - (h / 2.0) + + # Use KITTI rot_y directly for internal yaw + yaw = rot_y + + # Create the standardized box list [cx, cy, cz, l, w, h, yaw, class_label] + # Note the internal dimension order (l, w, h) differs from KITTI input order (h, w, l) + box = [cx, cy, cz, l, w, h, yaw, class_label] + + # Append score if it's a prediction file (standard KITTI index 15) + if not is_gt: + if len(parts) > 15: + score = float(parts[15]) + else: + # Handle missing score in prediction file if needed + score = 1.0 # Assigning default 1.0 as before + warnings.warn(f"[LOG] Prediction line missing score (field 16), assigning default 1.0: {line.strip()}") + box.append(score) # Append score using IDX_SCORE implicitly + + return box + + except ValueError as ve: + print(f"[LOG] Error parsing line: '{line.strip()}' - {ve}") + print("[LOG] Does input follows KITTI format ?") + return None + except Exception as e: + print(f"[LOG] Unexpected error parsing line: '{line.strip()}' - {e}") + traceback.print_exc() # Print detailed traceback for unexpected errors + return None + + +def load_labels(file_path, is_gt=False): + """Loads ground truth or prediction boxes from a KITTI-format file.""" + boxes = defaultdict(list) + if not os.path.exists(file_path): + # Reduced verbosity for missing prediction files (common) + # print(f"[LOG] File not found {file_path}, returning empty dict.") + return boxes + try: + with open(file_path, 'r') as f: + for i, line in enumerate(f): + box = parse_box_line(line, is_gt=is_gt) + if box: + class_label = box[IDX_CLASS] + boxes[class_label].append(box) + else: + print(f"[LOG] Skipping invalid line {i+1} in {file_path}") + except Exception as e: + print(f"[LOG] Error reading file {file_path}: {e}") + traceback.print_exc() + return boxes + +def get_aabb_corners(box): + # Uses internal format where cy is geometric center + cx, cy, cz, l, w, h = box[IDX_X], box[IDX_Y], box[IDX_Z], box[IDX_L], box[IDX_W], box[IDX_H] + # Ignores yaw + min_x, max_x = cx - l / 2, cx + l / 2 + min_y, max_y = cy - h / 2, cy + h / 2 # Uses geometric center cy + min_z, max_z = cz - w / 2, cz + w / 2 # Assuming cz is center z (depth) + return min_x, max_x, min_y, max_y, min_z, max_z + +def get_volume(box): + return box[IDX_L] * box[IDX_W] * box[IDX_H] + +def calculate_3d_iou(box1, box2, get_corners_cb, get_volume_cb): + """ + Calculates the 3D Intersection over Union (IoU) between two bounding boxes. + + Args: + box1, box2: List or tuple representing a 3D bounding box in the + *internal standardized format*: [cx, cy, cz, l, w, h, yaw, ...] + where cy is the geometric center y. + get_corners_cb: Callback function to extract AABB corners from a box + get_volume_cb: Callback function to calculate volume of a box + + Returns: + float: The 3D IoU value. + + Doesn't consider yaw + """ + + ######### Simple Axis-Aligned Bounding Box (AABB) IoU# + min_x1, max_x1, min_y1, max_y1, min_z1, max_z1 = get_corners_cb(box1) + min_x2, max_x2, min_y2, max_y2, min_z2, max_z2 = get_corners_cb(box2) + + # Calculate intersection volume + inter_min_x = max(min_x1, min_x2) + inter_max_x = min(max_x1, max_x2) + inter_min_y = max(min_y1, min_y2) + inter_max_y = min(max_y1, max_y2) + inter_min_z = max(min_z1, min_z2) + inter_max_z = min(max_z1, max_z2) + + inter_l = max(0, inter_max_x - inter_min_x) + inter_h = max(0, inter_max_y - inter_min_y) + inter_w = max(0, inter_max_z - inter_min_z) + intersection_volume = inter_l * inter_h * inter_w + + # Calculate union volume + vol1 = get_volume_cb(box1) + vol2 = get_volume_cb(box2) + # Ensure volumes are positive and non-zero for stable IoU + vol1 = max(vol1, 1e-6) + vol2 = max(vol2, 1e-6) + union_volume = vol1 + vol2 - intersection_volume + union_volume = max(union_volume, 1e-6) # Avoid division by zero or very small numbers + + iou = intersection_volume / union_volume + # Clamp IoU to [0, 1] range + iou = max(0.0, min(iou, 1.0)) + return iou + + +def calculate_ap(precision, recall): + """Calculates Average Precision using the PASCAL VOC method (area under monotonic PR curve).""" + # Convert to numpy arrays first for safety + if not isinstance(precision, (list, np.ndarray)) or not isinstance(recall, (list, np.ndarray)): + return 0.0 + precision = np.array(precision) + recall = np.array(recall) + + if precision.size == 0 or recall.size == 0: + return 0.0 + + # Prepend/Append points for interpolation boundaries + recall = np.concatenate(([0.], recall, [1.0])) + precision = np.concatenate(([0.], precision, [0.])) # Start with 0 precision at recall 0, end with 0 at recall 1 + + # Make precision monotonically decreasing (handles PR curve 'jiggles') + for i in range(len(precision) - 2, -1, -1): + precision[i] = max(precision[i], precision[i+1]) + + # Find indices where recall changes (avoids redundant calculations) + indices = np.where(recall[1:] != recall[:-1])[0] + 1 + + # Compute AP using the area under the curve (sum of rectangle areas) + ap = np.sum((recall[indices] - recall[indices-1]) * precision[indices]) + return ap + +def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou_threshold, confidence_thresholds=None): + """Evaluates a single detector's predictions against ground truth. + + Args: + gt_boxes_all_samples: Ground truth boxes dictionary by sample_id and class + pred_boxes_all_samples: Prediction boxes dictionary by sample_id and class + classes: List of classes to evaluate + iou_threshold: IoU threshold for matching predictions to ground truth + confidence_thresholds: List of confidence thresholds to use for evaluation + If None, uses [0.0] (all predictions) + + Returns: + Dictionary of results by class + """ + # Default to single threshold that includes all predictions + if confidence_thresholds is None: + confidence_thresholds = [0.0] + + results_by_class = {} + sample_ids = list(gt_boxes_all_samples.keys()) # Get fixed order of sample IDs + + for cls in classes: + all_pred_boxes_cls = [] + num_gt_cls = 0 + pred_sample_indices = [] # Store index from sample_ids for each prediction + pred_scores = [] # Store confidence scores for each prediction + + # Collect all GTs and Preds for this class across samples + for i, sample_id in enumerate(sample_ids): + # Use .get() with default empty dict/list for safety + gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) + pred_boxes = pred_boxes_all_samples.get(sample_id, {}).get(cls, []) + + num_gt_cls += len(gt_boxes) + for box in pred_boxes: + all_pred_boxes_cls.append(box) + pred_sample_indices.append(i) # Store the original sample index + # Extract score safely + if len(box) > IDX_SCORE and isinstance(box[IDX_SCORE], (int, float)): + pred_scores.append(box[IDX_SCORE]) + else: + pred_scores.append(0.0) # Default score if missing + + if not all_pred_boxes_cls: # Handle case with no predictions for this class + results_by_class[cls] = { + 'precision': np.array([]), # Use empty numpy arrays + 'recall': np.array([]), + 'ap': 0.0, + 'num_gt': num_gt_cls, + 'num_pred': 0, + 'confusion_matrix': {}, # Empty confusion matrix + 'thresholds': confidence_thresholds + } + continue # Skip to next class + + # Sort all predictions by score (descending) + sorted_indices = np.argsort(pred_scores)[::-1] + + # Reorder the lists based on sorted scores + all_pred_boxes_cls = [all_pred_boxes_cls[i] for i in sorted_indices] + pred_sample_indices = [pred_sample_indices[i] for i in sorted_indices] + pred_scores = [pred_scores[i] for i in sorted_indices] + + # Store results for each confidence threshold + threshold_results = {} + + # For each confidence threshold + for threshold in confidence_thresholds: + # Filter predictions above threshold + threshold_indices = [i for i, score in enumerate(pred_scores) if score >= threshold] + if not threshold_indices: + # No predictions above this threshold + threshold_results[threshold] = { + 'precision': np.array([]), + 'recall': np.array([]), + 'ap': 0.0, + 'tp': np.array([]), + 'fp': np.array([]), + 'tn': 0, # TN doesn't apply directly to object detection (no explicit negatives) + 'fn': num_gt_cls, # All GT boxes are missed + 'confusion_matrix': { + 'TP': 0, + 'FP': 0, + 'FN': num_gt_cls, + 'TN': 0 # Included for completeness + } + } + continue + + # Get predictions above this threshold + filtered_pred_boxes = [all_pred_boxes_cls[i] for i in threshold_indices] + filtered_sample_indices = [pred_sample_indices[i] for i in threshold_indices] + filtered_scores = [pred_scores[i] for i in threshold_indices] + + tp = np.zeros(len(filtered_pred_boxes)) + fp = np.zeros(len(filtered_pred_boxes)) + # Track matched GTs per sample: gt_matched[sample_idx][gt_box_idx] = True/False + gt_matched = defaultdict(lambda: defaultdict(bool)) # Indexed by sample_idx, then gt_idx + + # Match predictions + for det_idx, pred_box in enumerate(filtered_pred_boxes): + sample_idx = filtered_sample_indices[det_idx] + sample_id = sample_ids[sample_idx] + gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) + + best_iou = -1.0 + best_gt_idx = -1 + + if not gt_boxes: # No GT for this class in this specific sample + fp[det_idx] = 1 + continue + + for gt_idx, gt_box in enumerate(gt_boxes): + # Explicitly check class match (belt-and-suspenders) + if pred_box[IDX_CLASS] == gt_box[IDX_CLASS]: + iou = calculate_3d_iou(pred_box, gt_box, get_aabb_corners, get_volume) + if iou > best_iou: + best_iou = iou + best_gt_idx = gt_idx + + if best_iou >= iou_threshold: + # Check if this GT box was already matched *in this sample* + if not gt_matched[sample_idx].get(best_gt_idx, False): + tp[det_idx] = 1 + gt_matched[sample_idx][best_gt_idx] = True # Mark as matched + else: + fp[det_idx] = 1 # Already matched by higher-scored prediction + else: + fp[det_idx] = 1 # No match with sufficient IoU + + # Calculate confusion matrix metrics + total_tp = int(np.sum(tp)) + total_fp = int(np.sum(fp)) + + # Count false negatives (GT boxes that weren't matched) + total_fn = num_gt_cls - sum(len(matched) for matched in gt_matched.values()) + # True negatives don't apply well to object detection, but included for completeness + total_tn = 0 # Placeholder + + # Calculate precision/recall + fp_cumsum = np.cumsum(fp) + tp_cumsum = np.cumsum(tp) + + # Avoid division by zero if num_gt_cls is 0 + recall = tp_cumsum / num_gt_cls if num_gt_cls > 0 else np.zeros_like(tp_cumsum, dtype=float) + + # Avoid division by zero if no predictions were made or matched (tp + fp = 0) + denominator = tp_cumsum + fp_cumsum + precision = np.divide(tp_cumsum, denominator, out=np.zeros_like(tp_cumsum, dtype=float), where=denominator!=0) + + ap = calculate_ap(precision, recall) + + # Store results for this threshold + threshold_results[threshold] = { + 'precision': precision, + 'recall': recall, + 'ap': ap, + 'tp': tp, + 'fp': fp, + 'fn': total_fn, + 'tn': total_tn, + 'confusion_matrix': { + 'TP': total_tp, + 'FP': total_fp, + 'FN': total_fn, + 'TN': total_tn + }, + 'scores': filtered_scores + } + + # Find best AP score across thresholds + best_threshold = max(threshold_results.keys(), + key=lambda t: threshold_results[t]['ap'] if not np.isnan(threshold_results[t]['ap']) else 0.0) + best_result = threshold_results[best_threshold] + + # Store overall results for this class + results_by_class[cls] = { + 'precision': best_result['precision'], + 'recall': best_result['recall'], + 'ap': best_result['ap'], + 'num_gt': num_gt_cls, + 'num_pred': len(all_pred_boxes_cls), + 'best_threshold': best_threshold, + 'thresholds': confidence_thresholds, + 'threshold_results': threshold_results, + 'confusion_matrix': best_result['confusion_matrix'] + } + + return results_by_class + + +def plot_pr_curves(results_all_detectors, classes, output_dir): + """ + Plots Precision-Recall curves for each class. + + Creates: + 1. Overall PR curves comparing all detectors at their best threshold + 2. PR curves for each detector showing performance at different confidence thresholds + """ + if not os.path.exists(output_dir): + try: + os.makedirs(output_dir) + except OSError as e: + print(f"[LOG] Error creating output directory {output_dir} for plots: {e}") + return # Cannot save plots + + pr_curves_dir = os.path.join(output_dir, 'pr_curves') + if not os.path.exists(pr_curves_dir): + try: + os.makedirs(pr_curves_dir) + except OSError as e: + print(f"[LOG] Error creating PR curves directory {pr_curves_dir}: {e}") + return + + detector_names = list(results_all_detectors.keys()) + + # Overall PR curves comparing detectors + for cls in classes: + plt.figure(figsize=(10, 7)) + any_results_for_class = False # Track if any detector had results for this class + + for detector_name, results_by_class in results_all_detectors.items(): + if cls in results_by_class and results_by_class[cls]['num_pred'] > 0: # Check if there were predictions + res = results_by_class[cls] + precision = res['precision'] + recall = res['recall'] + ap = res['ap'] + best_threshold = res.get('best_threshold', 0.0) + + # Ensure plotting works even if precision/recall are empty arrays + if recall.size > 0 and precision.size > 0: + # Prepend a point for plotting nicely from recall=0 + plot_recall = np.concatenate(([0.], recall)) + # Use precision[0] if available, else 0. + plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) + plt.plot(plot_recall, plot_precision, marker='.', markersize=4, linestyle='-', + label=f'{detector_name} (AP={ap:.3f}, t={best_threshold:.2f})') + any_results_for_class = True + else: # Handle case where num_pred > 0 but P/R arrays somehow ended up empty + plt.plot([0], [0], marker='s', markersize=5, linestyle='', + label=f'{detector_name} (AP={ap:.3f}, No P/R data?)') + any_results_for_class = True # Still mark as having results + + elif cls in results_by_class: # Class exists in evaluation, but no predictions were made for it + num_gt = results_by_class[cls]['num_gt'] + if num_gt > 0: + # Plot a marker indicating no predictions were made for this GT class + plt.plot([0], [0], marker='x', markersize=6, linestyle='', + label=f'{detector_name} (No Pred, GT={num_gt})') + + if any_results_for_class: + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.title(f'Precision-Recall Curve for Class: {cls}') + plt.legend(loc='lower left', fontsize='small') + plt.grid(True) + plt.xlim([-0.05, 1.05]) + plt.ylim([-0.05, 1.05]) + plot_path = os.path.join(pr_curves_dir, f'pr_curve_{cls}_overview.png') + try: + plt.savefig(plot_path) + print(f"[LOG] Generated PR curve overview: {plot_path}") + except Exception as e: + print(f"[LOG] Error saving PR curve plot for class '{cls}': {e}") + finally: + plt.close() # Close the figure regardless of save success + else: + # Check if there was any GT data for this class across all detectors + num_gt_total = sum(results_by_class.get(cls, {}).get('num_gt', 0) for results_by_class in results_all_detectors.values()) + if num_gt_total > 0: + print(f" Skipping PR plot for class '{cls}': No predictions found across detectors (GT={num_gt_total}).") + else: + print(f" Skipping PR plot for class '{cls}': No ground truth found.") + plt.close() # Close the empty figure + + # Plots showing PR curves at different thresholds for each detector and class + for detector_name, results_by_class in results_all_detectors.items(): + detector_dir = os.path.join(pr_curves_dir, detector_name) + if not os.path.exists(detector_dir): + try: + os.makedirs(detector_dir) + except OSError as e: + print(f"[LOG] Error creating directory for {detector_name}: {e}") + continue + + for cls in classes: + if cls not in results_by_class or results_by_class[cls]['num_pred'] == 0: + continue + + res = results_by_class[cls] + threshold_results = res.get('threshold_results', {}) + + if not threshold_results: + continue + + plt.figure(figsize=(10, 7)) + + # Plot PR curve for each threshold + thresholds = sorted(threshold_results.keys()) + for threshold in thresholds: + threshold_res = threshold_results[threshold] + + # Skip thresholds with no predictions + if 'precision' not in threshold_res or len(threshold_res['precision']) == 0: + continue + + precision = threshold_res['precision'] + recall = threshold_res['recall'] + ap = threshold_res['ap'] + + # Get TP/FP counts for label + cm = threshold_res.get('confusion_matrix', {}) + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + + # Mark best threshold with a different line style + is_best = threshold == res.get('best_threshold', 0.0) + linestyle = '-' if is_best else '--' + linewidth = 2 if is_best else 1 + + # Prepend points for better visualization + plot_recall = np.concatenate(([0.], recall)) + plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) + + plt.plot(plot_recall, plot_precision, marker='.', markersize=3, + linestyle=linestyle, linewidth=linewidth, + label=f'thresh={threshold:.2f} (AP={ap:.3f}, TP={tp}, FP={fp})') + + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.title(f'{detector_name}: PR Curves at Different Thresholds for {cls}') + plt.legend(loc='lower left', fontsize='small') + plt.grid(True) + plt.xlim([-0.05, 1.05]) + plt.ylim([-0.05, 1.05]) + + plot_path = os.path.join(detector_dir, f'pr_curve_{cls}_thresholds.png') + try: + plt.savefig(plot_path) + print(f"[LOG] Generated threshold PR curves for {detector_name}/{cls}: {plot_path}") + except Exception as e: + print(f"[LOG] Error saving threshold PR curves for {detector_name}/{cls}: {e}") + finally: + plt.close() + +def export_confusion_matrix(results_all_detectors, output_dir, classes): + """ + Exports confusion matrix metrics for all detectors and classes to CSV files. + + Args: + results_all_detectors: Dictionary of results by detector and class + output_dir: Directory to save results + classes: List of classes to evaluate + """ + confusion_dir = os.path.join(output_dir, 'confusion_matrices') + if not os.path.exists(confusion_dir): + os.makedirs(confusion_dir) + + # Create a summary file for all detectors + summary_path = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + with open(summary_path, 'w', newline='') as f: + writer = csv.writer(f) + # Write header + writer.writerow(['Detector', 'Class', 'Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall', 'AP']) + + # Write data for each detector, class, and threshold + for detector_name, results_by_class in results_all_detectors.items(): + for cls in classes: + if cls in results_by_class: + cls_results = results_by_class[cls] + best_threshold = cls_results.get('best_threshold', 0.0) + threshold_results = cls_results.get('threshold_results', {}) + + for threshold, result in threshold_results.items(): + # Skip if no predictions at this threshold + if 'confusion_matrix' not in result: + continue + + cm = result['confusion_matrix'] + tp = cm['TP'] + fp = cm['FP'] + fn = cm['FN'] + tn = cm['TN'] + + # Calculate precision and recall + precision = tp / (tp + fp) if (tp + fp) > 0 else 0 + recall = tp / (tp + fn) if (tp + fn) > 0 else 0 + + # Write row + is_best = '* ' if threshold == best_threshold else '' + writer.writerow([ + detector_name, + cls, + f"{is_best}{threshold:.3f}", + tp, + fp, + fn, + tn, + f"{precision:.4f}", + f"{recall:.4f}", + f"{result['ap']:.4f}" + ]) + + print(f"[LOG] Confusion matrix metrics saved to: {summary_path}") + + # Create detailed files for each detector with per-threshold metrics + for detector_name, results_by_class in results_all_detectors.items(): + detector_file = os.path.join(confusion_dir, f'{detector_name}_detailed.csv') + with open(detector_file, 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['Class', 'Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall', 'AP']) + + for cls in classes: + if cls in results_by_class: + cls_results = results_by_class[cls] + threshold_results = cls_results.get('threshold_results', {}) + + # Sort thresholds for better readability + thresholds = sorted(threshold_results.keys()) + + for threshold in thresholds: + result = threshold_results[threshold] + if 'confusion_matrix' not in result: + continue + + cm = result['confusion_matrix'] + precision = cm['TP'] / (cm['TP'] + cm['FP']) if (cm['TP'] + cm['FP']) > 0 else 0 + recall = cm['TP'] / (cm['TP'] + cm['FN']) if (cm['TP'] + cm['FN']) > 0 else 0 + + writer.writerow([ + cls, + f"{threshold:.3f}", + cm['TP'], + cm['FP'], + cm['FN'], + cm['TN'], + f"{precision:.4f}", + f"{recall:.4f}", + f"{result['ap']:.4f}" + ]) + + print(f"[LOG] Detailed metrics for {detector_name} saved to: {detector_file}") + +def main(): + parser = argparse.ArgumentParser(description='Evaluate N 3D Object Detectors using KITTI format labels.') + parser.add_argument('gt_dir', type=str, help='Directory containing ground truth label files (KITTI format).') + parser.add_argument('pred_dirs', type=str, nargs='+', help='List of directories containing prediction files (KITTI format).') + parser.add_argument('output_dir', type=str, help='Directory to save evaluation results.') + parser.add_argument('--detector_names', type=str, nargs='*', help='Optional names for the detectors (corresponding to pred_dirs). If not provided, uses directory names.') + parser.add_argument('--iou_threshold', type=float, default=0.5, help='3D IoU threshold for matching (default: 0.5). KITTI examples: 0.5 (Car Easy/Mod), 0.7 (Car Hard), 0.5 (Ped/Cyc Easy/Mod), 0.5 (Ped/Cyc Hard).') + parser.add_argument('--classes', type=str, nargs='*', default=['Car', 'Pedestrian', 'Cyclist'], help='List of classes to evaluate (default: KITTI common classes). Case sensitive.') + parser.add_argument('--file_extension', type=str, default='.txt', help='Extension of the label files (default: .txt).') + parser.add_argument('--confidence_thresholds', type=float, nargs='*', default=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + help='Confidence thresholds for evaluation (default: 0.0-0.9 in 0.1 increments).') + + args = parser.parse_args() + + ######## Argument Validation and Setup + if not os.path.isdir(args.gt_dir): + print(f"[LOG] Error: Ground truth directory not found: {args.gt_dir}") + return + # Check prediction directories *before* loading GT + valid_pred_dirs = [] + original_detector_names = args.detector_names if args.detector_names else [os.path.basename(d.rstrip('/\\')) for d in args.pred_dirs] + final_detector_names = [] + pred_dir_map = {} # Map name back to directory path + + for i, d in enumerate(args.pred_dirs): + detector_name = original_detector_names[i] + if not os.path.isdir(d): + print(f"[LOG] Prediction directory not found: {d}. Skipping detector '{detector_name}'.") + else: + valid_pred_dirs.append(d) + final_detector_names.append(detector_name) + pred_dir_map[detector_name] = d + + if not valid_pred_dirs: + print("[LOG] Error: No valid prediction directories provided.") + return + + # Ensure output directory exists + try: + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + print(f"[LOG] Created output directory: {args.output_dir}") + except OSError as e: + print(f"[LOG] Error creating output directory {args.output_dir}: {e}") + return + + print("[LOG] Configuration") + print(f"Ground Truth Dir: {args.gt_dir}") + print(f"Prediction Dirs: {valid_pred_dirs}") # Show only valid dirs being used + print(f"Detector Names: {final_detector_names}") # Show corresponding names + print(f"Output Dir: {args.output_dir}") + print(f"IoU Threshold: {args.iou_threshold}") + print(f"Classes: {args.classes}") + print(f"File Extension: {args.file_extension}") + print(f"Confidence Thresholds: {args.confidence_thresholds}") + print("=====================\n") + + ######## Load Data + print("[LOG] Loading data...") + try: + gt_files = sorted([f for f in os.listdir(args.gt_dir) if f.endswith(args.file_extension)]) + if not gt_files: + print(f"[LOG] No ground truth files found in {args.gt_dir} with extension {args.file_extension}. Cannot evaluate.") + return + + print(f"Found {len(gt_files)} ground truth files.") + gt_boxes_all_samples = {} # {sample_id: {class: [boxes]}} + # Load GT data first + print("Loading Ground Truth...") + for filename in gt_files: + sample_id = os.path.splitext(filename)[0] + gt_file_path = os.path.join(args.gt_dir, filename) + gt_boxes_all_samples[sample_id] = load_labels(gt_file_path, is_gt=True) + print("Ground Truth loaded.") + + # Load prediction data for valid detectors + pred_boxes_all_detectors = {} # {detector_name: {sample_id: {class: [boxes]}}} + print("Loading Predictions...") + for detector_name in final_detector_names: + pred_dir = pred_dir_map[detector_name] # Get path from map + print(f" Loading from {detector_name} ({pred_dir})...") + pred_boxes_all_detectors[detector_name] = {} + files_found_count = 0 + for filename in gt_files: # Iterate based on GT files for consistency + sample_id = os.path.splitext(filename)[0] + pred_file_path = os.path.join(pred_dir, filename) + # load_labels handles non-existent files, returns empty dict + pred_boxes_all_detectors[detector_name][sample_id] = load_labels(pred_file_path, is_gt=False) + if os.path.exists(pred_file_path): + files_found_count += 1 + print(f" Found {files_found_count} matching prediction files for {detector_name}.") + print("Predictions loaded.") + + print(f"\n[LOG] Loaded data for {len(gt_files)} samples and {len(final_detector_names)} detector(s).\n") + + except Exception as e: + print(f"[LOG] Error during data loading: {e}") + traceback.print_exc() + return + + ######## Evaluation + print("[LOG] Evaluating detectors...") + results_all_detectors = {} # {detector_name: {class: {'ap':, 'p':, 'r':, 'gt':, 'pred':}}} + try: + for detector_name in final_detector_names: # Use the filtered list of names + print(f" Evaluating: {detector_name}") + # Check if prediction data was actually loaded for this detector + if detector_name not in pred_boxes_all_detectors: + print(f" [LOG] Skipping {detector_name} - No prediction data loaded (check LOG)") + results_all_detectors[detector_name] = {} # Store empty results + continue + + pred_boxes_all_samples = pred_boxes_all_detectors[detector_name] + + if not gt_boxes_all_samples: # Redundant check, but safe + print(f" Skipping {detector_name} - No ground truth data loaded.") + results_all_detectors[detector_name] = {} + continue + + # Perform evaluation + results_by_class = evaluate_detector( + gt_boxes_all_samples, + pred_boxes_all_samples, + args.classes, # Pass the user-specified classes + args.iou_threshold, + args.confidence_thresholds + ) + results_all_detectors[detector_name] = results_by_class + print("[LOG] Evaluation complete.\n") + except Exception as e: + print(f"[LOG] Error during evaluation: {e}") + traceback.print_exc() + return # Stop execution on evaluation error + + + ######## Report Results & Save + print("[LOG] Results") + results_file_path = os.path.join(args.output_dir, 'evaluation_metrics.txt') + + try: + with open(results_file_path, 'w') as f: + f.write(f"Evaluation Results (IoU Threshold: {args.iou_threshold})\n") + f.write(f"Evaluated Classes: {', '.join(args.classes)}\n") + f.write(f"Confidence Thresholds: {args.confidence_thresholds}\n") + f.write("="*60 + "\n") + + overall_mAPs = {} # Store mAP for each detector {detector_name: mAP_value} + + # Process results for each detector that was evaluated + for detector_name in final_detector_names: + # Check if results exist for this detector (might be skipped if loading failed) + if detector_name not in results_all_detectors: + print(f"\nDetector: {detector_name} (SKIPPED - No results)") + f.write(f"\nDetector: {detector_name} (SKIPPED - No results)\n") + continue # Skip to the next detector + + results_by_class = results_all_detectors[detector_name] + print(f"\nDetector: {detector_name}") + f.write(f"\nDetector: {detector_name}\n") + f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") + f.write("-" * 85 + "\n") + + detector_aps = [] # AP values for classes with GT > 0 for this detector + evaluated_classes_with_gt = [] # Class names with GT > 0 for this detector + + # Iterate through the classes specified by the user for consistent reporting + for cls in args.classes: + # Check if GT data exists for this class across all samples + num_gt_total_for_class = sum(len(gt_boxes.get(cls, [])) for gt_boxes in gt_boxes_all_samples.values()) + + if cls in results_by_class: + # Results exist for this class and detector + res = results_by_class[cls] + ap = res['ap'] + num_gt = res['num_gt'] # Should match num_gt_total_for_class if evaluated correctly + num_pred = res['num_pred'] # Number of valid predictions for this class + best_threshold = res.get('best_threshold', 0.0) + + # Get confusion matrix from best threshold + cm = res.get('confusion_matrix', {}) + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + fn = cm.get('FN', 0) + + print(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10} | {best_threshold:<11.3f} | {tp:<5} | {fp:<5} | {fn:<5}") + f.write(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10} | {best_threshold:<11.3f} | {tp:<5} | {fp:<5} | {fn:<5}\n") + + # Include in mAP calculation only if there were GT boxes for this class + if num_gt > 0: + detector_aps.append(ap) + evaluated_classes_with_gt.append(cls) + # If num_gt is 0 for a class, its AP is 0 or NaN, not included in mAP. + else: + # No results entry for this class, implies 0 valid predictions processed. + # Report AP as 0.0000 if GT existed, otherwise N/A. + print(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10} | {'N/A':<11} | {'0':<5} | {'0':<5} | {num_gt_total_for_class:<5}") + f.write(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10} | {'N/A':<11} | {'0':<5} | {'0':<5} | {num_gt_total_for_class:<5}\n") + # If GT existed, this class contributes 0 to the mAP average. + if num_gt_total_for_class > 0: + detector_aps.append(0.0) + evaluated_classes_with_gt.append(cls) + + # Calculate and report mAP for this detector based on evaluated classes with GT + if detector_aps: # Check if list is not empty + mAP = np.mean(detector_aps) + overall_mAPs[detector_name] = mAP + # Report which classes contributed to the mAP + mAP_info = f"(Classes w/ GT: {', '.join(evaluated_classes_with_gt)})" if evaluated_classes_with_gt else "" + print("-" * 85) + print(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}") + f.write("-" * 85 + "\n") + f.write(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}\n") + + # Add confusion matrix section for this detector + f.write("\nConfusion Matrix Summary (at best threshold per class):\n") + f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") + f.write("-" * 75 + "\n") + + for cls in evaluated_classes_with_gt: + if cls in results_by_class: + res = results_by_class[cls] + best_thresh = res.get('best_threshold', 0.0) + cm = res.get('confusion_matrix', {}) + + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + fn = cm.get('FN', 0) + + precision = tp / (tp + fp) if (tp + fp) > 0 else 0 + recall = tp / (tp + fn) if (tp + fn) > 0 else 0 + + f.write(f"{cls:<15} | {best_thresh:<10.3f} | {tp:<5} | {fp:<5} | {fn:<5} | {precision:<10.4f} | {recall:<10.4f}\n") + + else: + overall_mAPs[detector_name] = np.nan # Indicate mAP couldn't be computed + print("-" * 85) + print(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}") + f.write("-" * 85 + "\n") + f.write(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}\n") + + + ######## Overall Comparison Table + # Check if there are results to compare + if len(final_detector_names) > 0 and results_all_detectors: + print("\n[LOG] Overall Class AP Comparison") + f.write("\n" + "="*60 + "\n") + f.write("Overall Class AP Comparison\n") + f.write("="*60 + "\n") + # Truncate long detector names for table header + header_names = [name[:12] for name in final_detector_names] # Limit name length + header = f"{'Class':<15}" + "".join([f" | {name:<12}" for name in header_names]) + print(header) + f.write(header + "\n") + print("-" * len(header)) + f.write("-" * len(header) + "\n") + + # Iterate through user-specified classes for the table rows + for cls in args.classes: + aps_str = "" + num_gt_total_for_class = sum(len(gt_boxes.get(cls, [])) for gt_boxes in gt_boxes_all_samples.values()) + + # Get AP for each detector for this class + for detector_name in final_detector_names: + # Retrieve AP using .get chains safely + ap_val = results_all_detectors.get(detector_name, {}).get(cls, {}).get('ap', np.nan) + + # Decide display value based on AP and total GT count + if np.isnan(ap_val): + # Show 0.0000 if GT existed, else N/A + display_val = f"{0.0:<12.4f}" if num_gt_total_for_class > 0 else f"{'N/A':<12}" + else: + # Format valid AP value + display_val = f"{ap_val:<12.4f}" + aps_str += f" | {display_val}" + + line = f"{cls:<15}" + aps_str + print(line) + f.write(line + "\n") + + ######## Overall mAP Comparison + print("-" * len(header)) + f.write("-" * len(header) + "\n") + map_str = "" + for detector_name in final_detector_names: + map_val = overall_mAPs.get(detector_name, np.nan) # Get calculated mAP + map_str += f" | {map_val:<12.4f}" if not np.isnan(map_val) else f" | {'N/A':<12}" + map_line = f"{'mAP':<15}" + map_str + print(map_line) + f.write(map_line + "\n") + else: + print("\n[LOG] Skipping overall comparison table - no results available.") + f.write("\n[LOG] Skipping overall comparison table - no results available.\n") + + print(f"\nMetrics saved to: {results_file_path}") + + except Exception as e: + print(f"[LOG] Error during results reporting/saving: {e}") + traceback.print_exc() + # Plot + # return # Or stop + + ########## Export Confusion Matrix CSV + try: + export_confusion_matrix(results_all_detectors, args.output_dir, args.classes) + except Exception as e: + print(f"[LOG] Error exporting confusion matrix: {e}") + traceback.print_exc() + + ########## Plotting + print("\n[LOG] Generating PR curves...") + try: + # Check if results_all_detectors has data before plotting + if results_all_detectors and any(results_all_detectors.values()): + # Pass only valid detector results to plotting function + # Filter out detectors that were skipped or had errors resulting in no results dict + valid_results = {k: v for k, v in results_all_detectors.items() if isinstance(v, dict) and k in final_detector_names} + if valid_results: + plot_pr_curves(valid_results, args.classes, args.output_dir) + print(f"[LOG] PR curve plots potentially saved in: {args.output_dir}") + else: + print("[LOG] Skipping plotting - no valid evaluation results were generated.") + else: + print("[LOG] Skipping plotting - no evaluation results generated.") + + except Exception as e: + print(f"Error during plotting: {e}") + traceback.print_exc() + + print("\n[LOG] Evaluation finished.") + + +if __name__ == '__main__': + main() diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt new file mode 100644 index 000000000..6b9ce3138 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.0.2) +project(pointpillars_ros) + +## Find required ROS packages +find_package(catkin REQUIRED COMPONENTS + cv_bridge + geometry_msgs + message_filters + ros_numpy + rospy + sensor_msgs + std_msgs + tf2_ros + visualization_msgs +) + +## Declare Python scripts installation +catkin_install_python(PROGRAMS + scripts/pointpillars_ros_node.py + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +## Declare launch files installation +install(DIRECTORY launch/ + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/launch +) diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch new file mode 100644 index 000000000..022ff77cb --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml new file mode 100644 index 000000000..06f2727ad --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml @@ -0,0 +1,21 @@ + + + pointpillars_ros + 0.1.0 + ROS wrapper for PointPillars 3D object detection. + Your Name Apache-2.0 catkin + + rospy + std_msgs + sensor_msgs + geometry_msgs + visualization_msgs + message_filters + tf2_ros + cv_bridge + ros_numpy + + + + + diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py new file mode 100644 index 000000000..c8d927899 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 + +## To run: +# Create Package: catkin_create_pkg pointpillars_ros rospy std_msgs sensor_msgs geometry_msgs visualization_msgs message_filters tf2_ros cv_bridge ros_numpy +# In launch/pointpillars_test.launch: Update Checkpoint_path and config_path. Set target_frame. +# chmod +x ~/pointpillars_ros/scripts/pointpillars_ros_node.py +# catkin_make +# source ~/catkin_ws/devel/setup.bash +# export ROS_PACKAGE_PATH=~Gem**/pointpillars_ros/../:$ROS_PACKAGE_PATH +# roslaunch pointpillars_ros pointpillars_test.launch (Make sure roscore and your data source (e.g., rosbag play) are running). +# Publish K and D on CameraInfo topic, and a working TF tree providing the transform between the LiDAR frame and the camera frame (self.target_frame). +#!/usr/bin/env python3 + +#!/usr/bin/env python3 + +import rospy +import ros_numpy +import numpy as np +import torch +import cv2 +import os +import sys +import time + +###### ROS Message Imports +from sensor_msgs.msg import PointCloud2, Image # Removed CameraInfo +from visualization_msgs.msg import Marker, MarkerArray +from geometry_msgs.msg import Point + +###### Synchronization and TF Imports +import message_filters +import tf2_ros +import tf2_geometry_msgs +from tf.transformations import quaternion_from_euler + +###### Image Processing Import +from cv_bridge import CvBridge, CvBridgeError + +pointpillars_repo_path = '~/PointPillars' # <<< CHANGE THIS >>> + +if not os.path.isdir(pointpillars_repo_path): + try: import rospy; rospy.logfatal(f"PointPillars repository path not found: {pointpillars_repo_path}") + except ImportError: print(f"[FATAL] PointPillars repository path not found: {pointpillars_repo_path}") + sys.exit(1) +sys.path.insert(0, pointpillars_repo_path) + +###### Import necessary PointPillars components +try: + from pointpillars.model import PointPillars + from pointpillars.utils import setup_seed +except ImportError as e: + try: import rospy; rospy.logfatal(f"Failed to import PointPillars modules from {pointpillars_repo_path}. Error: {e}") + except ImportError: print(f"[FATAL] Failed to import PointPillars modules from {pointpillars_repo_path}. Error: {e}") + sys.exit(1) + + +class PointPillarsROS: + def __init__(self): + rospy.init_node('pointpillars_ros_node') + + ###### Load essential parameters + self.checkpoint_path = rospy.get_param('~checkpoint_path') + self.config_path = rospy.get_param('~config_path') # Still potentially needed for class names etc. + lidar_topic = rospy.get_param('~lidar_topic', '/ouster/points') + image_topic = rospy.get_param('~image_topic', '/oak/rgb/image_raw') + # Removed camera_info_topic parameter + self.score_threshold = rospy.get_param('~score_threshold', 0.3) + self.target_frame = rospy.get_param('~target_frame') # Must match child frame in TF publisher + + ###### Load Calibration K and D from Parameters + try: + k_list = rospy.get_param('~camera_K') + d_list = rospy.get_param('~camera_D') + + # Validate and convert K + if not isinstance(k_list, list) or len(k_list) != 3 or \ + not all(isinstance(row, list) and len(row) == 3 for row in k_list): + raise ValueError("Parameter '~camera_K' must be a 3x3 list of lists.") + self.camera_matrix = np.array(k_list, dtype=np.float64) # Use float64 for cv2 + + # Validate and convert D + if not isinstance(d_list, list): + raise ValueError("Parameter '~camera_D' must be a list.") + self.dist_coeffs = np.array(d_list, dtype=np.float64) # Use float64 for cv2 + + rospy.loginfo("Camera K and D matrices loaded from parameters.") + rospy.logdebug(f"K = {self.camera_matrix.tolist()}") + rospy.logdebug(f"D = {self.dist_coeffs.tolist()}") + + except Exception as e: + rospy.logfatal(f"Failed to load camera calibration K/D from parameters: {e}") + rospy.logfatal("Please ensure '~camera_K' (3x3 list) and '~camera_D' (list) parameters are set correctly in the launch file.") + sys.exit(1) + + ###### Load PointPillars configuration (might still need for class names etc.) + try: + # If config isn't needed by model(), this might be simplified further + # depending on how class names are handled for visualization + from pointpillars.utils import load_config # Import here if needed + self.cfg = load_config(self.config_path) + rospy.loginfo(f"Loaded model config from {self.config_path}") + except FileNotFoundError: + # Config might be optional if class names handled differently + rospy.logwarn(f"Model config file not found: {self.config_path}. Class names might not be available.") + self.cfg = {} # Use empty dict if config optional/missing + except Exception as e: + rospy.logwarn(f"Error loading config {self.config_path}: {e}. Class names might not be available.") + self.cfg = {} + + ###### Setup Seed + setup_seed(self.cfg.get('seed', 0)) # Use seed from config or default + + ###### Define Point Cloud Ranges (from test.py) + self.pcd_limit_range = np.array([0, -39.68, -3, 69.12, 39.68, 1], dtype=np.float32) + self.pcd_post_limit_range = np.array([0, -40, -3, 70.4, 40, 0.0], dtype=np.float32) + + ###### Determine number of classes + n_classes = self.cfg.get('nclasses', 3) # Get from config or default to 3 + rospy.loginfo(f"Initializing PointPillars model for {n_classes} classes.") + + ###### Setup device and load model + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model = PointPillars(nclasses=n_classes).to(self.device) + + if not os.path.exists(self.checkpoint_path): + rospy.logfatal(f"Checkpoint file not found: {self.checkpoint_path}") + sys.exit(1) + try: + map_location = self.device if not torch.cuda.is_available() else None + checkpoint = torch.load(self.checkpoint_path, map_location=map_location) + state_dict = checkpoint.get('model_state_dict', checkpoint) + self.model.load_state_dict(state_dict) + self.model.eval() + rospy.loginfo(f"PointPillars model loaded successfully from {self.checkpoint_path} on device {self.device}.") + except Exception as e: + rospy.logfatal(f"Error loading model checkpoint: {e}", exc_info=True) + sys.exit(1) + + ###### Initialize CV Bridge + self.cv_bridge = CvBridge() + + ###### TF Listener Setup + self.tf_buffer = tf2_ros.Buffer() + self.tf_listener = tf2_ros.TransformListener(self.tf_buffer) + + ###### Publisher Setup + self.image_pub = rospy.Publisher("~output/image_annotated", Image, queue_size=2) + self.marker_pub = rospy.Publisher("~output/bounding_boxes", MarkerArray, queue_size=5) + + ###### Subscriber and Synchronizer Setup + lidar_sub = message_filters.Subscriber(lidar_topic, PointCloud2) + image_sub = message_filters.Subscriber(image_topic, Image) + self.sync = message_filters.ApproximateTimeSynchronizer( + [lidar_sub, image_sub], queue_size=15, slop=0.1) + self.sync.registerCallback(self.callback) + + rospy.loginfo("PointPillars ROS node initialized and ready.") + + + def point_range_filter(self, pts, point_range): + ###### Filter points outside the specified range + keep_mask = (pts[:, 0] > point_range[0]) & \ + (pts[:, 1] > point_range[1]) & \ + (pts[:, 2] > point_range[2]) & \ + (pts[:, 0] < point_range[3]) & \ + (pts[:, 1] < point_range[4]) & \ + (pts[:, 2] < point_range[5]) + return pts[keep_mask] + + # Removed process_camera_info callback + + def get_transform(self, target_frame, source_frame, timestamp): + ###### Get TF transform + try: + return self.tf_buffer.lookup_transform(target_frame, source_frame, timestamp, rospy.Duration(0.1)) + except Exception as e: + rospy.logwarn_throttle(5.0, f"TF lookup failed from {source_frame} to {target_frame}: {e}") + return None + + def callback(self, lidar_msg, image_msg): + start_time = time.time() + + ###### Convert Lidar + try: + pc_data = ros_numpy.point_cloud2.pointcloud2_to_array(lidar_msg) + if 'intensity' in pc_data.dtype.names: + points = np.vstack([pc_data['x'], pc_data['y'], pc_data['z'], pc_data['intensity']]).T + else: + rospy.logwarn_once("Lidar 'intensity' field not found. Using 0.") + points = np.vstack([pc_data['x'], pc_data['y'], pc_data['z'], np.zeros(len(pc_data))]).T + points = points.astype(np.float32) + except Exception as e: + rospy.logerr(f"Error converting PointCloud2: {e}") + return + + ###### Filter points by range + points_filtered = self.point_range_filter(points, self.pcd_limit_range) + if points_filtered.shape[0] == 0: + rospy.logdebug("No points left after range filtering.") + if self.marker_pub.get_num_connections() > 0: self.clear_markers(lidar_msg.header) + if self.image_pub.get_num_connections() > 0: self.image_pub.publish(image_msg) + return + + ###### Convert to Tensor + pc_torch = torch.from_numpy(points_filtered).to(self.device) + batched_pts = [pc_torch] + + ###### Run Inference + result_dict = {} + with torch.no_grad(): + try: + results = self.model(batched_pts=batched_pts, mode='test') + if isinstance(results, list) and len(results) > 0: + result_dict = results[0] + else: + rospy.logwarn(f"Unexpected model output format: {type(results)}. Expected list.") + except Exception as e: + rospy.logerr(f"Error during model inference: {e}", exc_info=True) + return # Exit callback on inference error + + ###### Extract Results + det_boxes_lidar = result_dict.get('lidar_bboxes', np.array([])) + det_labels = result_dict.get('labels', np.array([])) + det_scores = result_dict.get('scores', np.array([])) + + ###### Apply Score Thresholding & Lidar Range Filtering + if det_boxes_lidar.shape[0] > 0: + score_mask = det_scores >= self.score_threshold + det_boxes_lidar = det_boxes_lidar[score_mask] + det_labels = det_labels[score_mask] + det_scores = det_scores[score_mask] + + if det_boxes_lidar.shape[0] > 0: + center_x = det_boxes_lidar[:, 0]; center_y = det_boxes_lidar[:, 1]; center_z = det_boxes_lidar[:, 2] + range_mask = (center_x >= self.pcd_post_limit_range[0]) & (center_y >= self.pcd_post_limit_range[1]) & \ + (center_z >= self.pcd_post_limit_range[2]) & (center_x < self.pcd_post_limit_range[3]) & \ + (center_y < self.pcd_post_limit_range[4]) & (center_z < self.pcd_post_limit_range[5]) + det_boxes_lidar = det_boxes_lidar[range_mask] + det_labels = det_labels[range_mask] + det_scores = det_scores[range_mask] + + ###### Get Transform for Visualization + # target_frame now comes directly from param, MUST match static transform publisher child frame + transform_lidar_to_target = self.get_transform(self.target_frame, lidar_msg.header.frame_id, lidar_msg.header.stamp) + + ###### Visualization + num_detections = len(det_boxes_lidar) + cv_image = None + try: + cv_image = self.cv_bridge.imgmsg_to_cv2(image_msg, desired_encoding="bgr8") + except CvBridgeError as e: + rospy.logerr(f"Error converting Image: {e}") + + if num_detections > 0: + rospy.logdebug(f"Detected {num_detections} objects after filtering.") + if self.marker_pub.get_num_connections() > 0: + self.publish_markers(det_boxes_lidar, lidar_msg.header, det_labels, det_scores) + + # Use K, D loaded from params for projection + if self.image_pub.get_num_connections() > 0 and cv_image is not None: + if self.camera_matrix is not None and self.dist_coeffs is not None and transform_lidar_to_target is not None: + annotated_image = self.project_and_draw_boxes_on_image( + cv_image.copy(), det_boxes_lidar, transform_lidar_to_target, + self.camera_matrix, self.dist_coeffs, det_labels, det_scores) # Pass K, D from params + try: + img_msg_out = self.cv_bridge.cv2_to_imgmsg(annotated_image, encoding="bgr8") + img_msg_out.header = image_msg.header + self.image_pub.publish(img_msg_out) + except CvBridgeError as e: + rospy.logerr(f"Error converting annotated image to message: {e}") + else: + rospy.logwarn_throttle(5.0, "Cannot annotate image: Missing TF transform (K/D loaded from params).") + if cv_image is not None: self.image_pub.publish(image_msg) # Publish original + else: + rospy.logdebug("No detections passed filters.") + if self.marker_pub.get_num_connections() > 0: self.clear_markers(lidar_msg.header) + if self.image_pub.get_num_connections() > 0 and cv_image is not None: self.image_pub.publish(image_msg) # Publish original + + processing_time = time.time() - start_time + rospy.logdebug(f"Processed frame in {processing_time:.4f}s ({(1.0/processing_time if processing_time > 0 else 0):.1f} Hz)") + + def project_and_draw_boxes_on_image(self, image, boxes_lidar, transform, K, D, labels=None, scores=None): + ###### Project 3D boxes to image plane and draw using OpenCV + if K is None or D is None or transform is None: return image + ###### Convert transform to 4x4 matrix + trans = transform.transform.translation; quat = transform.transform.rotation + tf_matrix = tf.transformations.quaternion_matrix([quat.x, quat.y, quat.z, quat.w]) + tf_matrix[0, 3] = trans.x; tf_matrix[1, 3] = trans.y; tf_matrix[2, 3] = trans.z + ###### Box corners (unit cube) and edges + corners_norm = np.array([[ .5, .5, .5],[ .5, .5,-.5],[ .5,-.5,-.5],[ .5,-.5, .5], + [-.5, .5, .5],[-.5, .5,-.5],[-.5,-.5,-.5],[-.5,-.5, .5]]) + edges = [(0,1),(1,2),(2,3),(3,0),(4,5),(5,6),(6,7),(7,4),(0,4),(1,5),(2,6),(3,7)] + + for i, box in enumerate(boxes_lidar): + x, y, z, dx, dy, dz, yaw = box + if dx<=0 or dy<=0 or dz<=0: continue + ###### Calculate 8 corners in lidar frame + Rz = np.array([[np.cos(yaw),-np.sin(yaw),0],[np.sin(yaw),np.cos(yaw),0],[0,0,1]]) + corners = corners_norm * np.array([dx, dy, dz]); corners = corners @ Rz.T; corners += np.array([x, y, z]) + ###### Transform corners to camera frame + corners_lidar_h = np.hstack((corners, np.ones((8,1)))) + corners_cam_h = (tf_matrix @ corners_lidar_h.T).T; corners_cam = corners_cam_h[:,:3] + ###### Keep only points in front of the camera + front_mask = corners_cam[:, 2] > 0.1; + if not np.all(front_mask): continue + ###### Project to image plane using cv2.projectPoints + try: image_points, _ = cv2.projectPoints(corners_cam, np.zeros(3), np.zeros(3), K, D); image_points = image_points.reshape(-1, 2).astype(int) + except: continue # Ignore projection errors + ###### Draw edges + color = (0, 255, 0); thickness = 1 + for start_idx, end_idx in edges: cv2.line(image, tuple(image_points[start_idx]), tuple(image_points[end_idx]), color, thickness) + ###### Add label/score text (optional) + if labels is not None and scores is not None and i < len(labels) and i < len(scores): + try: + label_name = self.cfg.get('class_names', {}).get(int(labels[i]), f"L:{int(labels[i])}") # Use cfg if available, else basic + score = scores[i]; text = f"{label_name}:{score:.2f}" + text_pos = tuple(((image_points[0] + image_points[4]) // 2) + np.array([0,-5])) # Pos above top front center + cv2.putText(image, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1, cv2.LINE_AA) + except: pass # Ignore text errors + return image + + def clear_markers(self, header): + marker_array = MarkerArray(); marker = Marker(); marker.header = header + marker.ns = "pointpillars_boxes"; marker.id = 0; marker.action = Marker.DELETEALL + marker_array.markers.append(marker); self.marker_pub.publish(marker_array) + + def publish_markers(self, boxes_lidar, header, labels=None, scores=None): + marker_array = MarkerArray(); marker_id = 0; default_lifetime = rospy.Duration(0.2) + for i, box in enumerate(boxes_lidar): + x,y,z,dx,dy,dz,yaw = box; label_id = int(labels[i]) if labels is not None else 0; score = scores[i] if scores is not None else 1.0 + marker = Marker(); marker.header = header; marker.ns = "pointpillars_boxes"; marker.id = marker_id; marker.type = Marker.CUBE; marker.action = Marker.ADD; marker.lifetime = default_lifetime + marker.pose.position.x=x; marker.pose.position.y=y; marker.pose.position.z=z + q = quaternion_from_euler(0,0,yaw); marker.pose.orientation.x=q[0]; marker.pose.orientation.y=q[1]; marker.pose.orientation.z=q[2]; marker.pose.orientation.w=q[3] + marker.scale.x=max(dx,0.01); marker.scale.y=max(dy,0.01); marker.scale.z=max(dz,0.01); marker.color.r=0.0; marker.color.g=1.0; marker.color.b=0.0; marker.color.a=0.4 + marker_array.markers.append(marker); marker_id += 1 + text_marker = Marker(); text_marker.header = header; text_marker.ns = "pointpillars_labels"; text_marker.id = marker_id; text_marker.type = Marker.TEXT_VIEW_FACING; text_marker.action = Marker.ADD; text_marker.lifetime = default_lifetime + text_marker.pose.position.x=x; text_marker.pose.position.y=y; text_marker.pose.position.z = z + dz/2.0 + 0.3; text_marker.scale.z = 0.4; text_marker.color.r=1.0; text_marker.color.g=1.0; text_marker.color.b=1.0; text_marker.color.a=0.9 + label_name = self.cfg.get('class_names', {}).get(label_id, f"L:{label_id}") # Use cfg if available + text_marker.text = f"{label_name} ({score:.2f})" + marker_array.markers.append(text_marker); marker_id += 1 + if marker_array.markers: self.marker_pub.publish(marker_array) + +if __name__ == '__main__': + try: node = PointPillarsROS(); rospy.spin() + except rospy.ROSInterruptException: rospy.loginfo("PointPillars ROS node shutting down.") + except Exception as e: rospy.logfatal(f"Unhandled exception: {e}", exc_info=True) + finally: + if 'node' in locals() and hasattr(node, 'device') and node.device == torch.device("cuda"): + try: import torch.cuda; torch.cuda.empty_cache(); rospy.loginfo("Cleared CUDA cache.") + except: pass + rospy.loginfo("PointPillars ROS node finished.") diff --git a/GEMstack/onboard/perception/sensorFusion/rosbag_processor.py b/GEMstack/onboard/perception/sensorFusion/rosbag_processor.py new file mode 100644 index 000000000..4742e2b49 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/rosbag_processor.py @@ -0,0 +1,103 @@ +import pypcd +import rospy +import message_filters +from sensor_msgs.msg import Image, PointCloud2 +import cv2 +# import os +from cv_bridge import CvBridge + +import numpy as np +np.float = np.float64 # temp fix for the following import +import ros_numpy + +from pathlib import Path + + +class RosbagProcessor(): + # Pairs up and stores Image and PointCloud2 messages into their respective test (20%) and training (80%) png and bin files. + def __init__(self, DATA_DIR: str, train_counter: int = 0, test_counter: int = 0) -> None: + self.__img_folder = 'image_2' + self.__lidar_folder = 'velodyne' + + self.__TEST_DIR = DATA_DIR / 'testing' + self.__TRAIN_DIR = DATA_DIR / 'training' + TEST_IMG_DIR = self.__TEST_DIR / self.__img_folder + TEST_BIN_DIR = self.__TEST_DIR / self.__lidar_folder + TRAIN_IMG_DIR = self.__TRAIN_DIR / self.__img_folder + TRAIN_BIN_DIR = self.__TRAIN_DIR / self.__lidar_folder + + # Create directories if they don't exist + TEST_IMG_DIR.mkdir(parents=True, exist_ok=True) + TEST_BIN_DIR.mkdir(parents=True, exist_ok=True) + TRAIN_IMG_DIR.mkdir(parents=True, exist_ok=True) + TRAIN_BIN_DIR.mkdir(parents=True, exist_ok=True) + + self.__train_counter = train_counter + self.__test_counter = test_counter + self.__bridge = CvBridge() + + # Subscribers and sychronizers + rospy.init_node('bagListener', anonymous=True) + self.rgb_rosbag = message_filters.Subscriber('/oak/rgb/image_raw', Image) + self.top_lidar_rosbag = message_filters.Subscriber('/ouster/points', PointCloud2) + self.sync = message_filters.ApproximateTimeSynchronizer([self.rgb_rosbag, self.top_lidar_rosbag], queue_size=10, slop=0.1) + self.sync.registerCallback(self.pair_data_callback) + + rospy.spin() + + def pair_data_callback(self, rgb_image_msg: Image, lidar_pc2_msg: PointCloud2): + # Divide up 20% of the data for testing: + if (self.__train_counter + self.__test_counter) % 5 == 0: + PARENT_FOLDER = self.__TEST_DIR + idx = self.__test_counter + else: + PARENT_FOLDER = self.__TRAIN_DIR + idx = self.__train_counter + + # Store point cloud and image data with KITTI formatted filenames: + image_filename = PARENT_FOLDER / self.__img_folder / f"{idx:06d}.png" + pc_filename = PARENT_FOLDER / self.__lidar_folder / f"{idx:06d}.bin" + # image_filename = self.get_path(image_filename) + # pc_filename = self.get_path(pc_filename) + + # Read and write the received image into a KITTI specified png file + cv_image = self.__bridge.imgmsg_to_cv2(rgb_image_msg, desired_encoding='bgr8') + cv2.imwrite(image_filename, cv_image) + + # Read and write the lidar data into a KITTI specified bin file: + # Convert to numpy array and flatten it + pc_array = ros_numpy.point_cloud2.pointcloud2_to_array(lidar_pc2_msg) + pc_array = pc_array.reshape(-1) + + # Create (N, 4) array and fill it + points = np.zeros((pc_array.shape[0], 4), dtype=np.float32) + points[:, 0] = pc_array['x'] + points[:, 1] = pc_array['y'] + points[:, 2] = pc_array['z'] + + # Check for intensity just incase: + if 'intensity' in pc_array.dtype.names: + points[:, 3] = pc_array['intensity'] + else: + rospy.loginfo(f"Intensity was set to zero") + points[:, 3] = 0.0 + + # Save as binary file + with open(pc_filename, 'wb') as f: + f.write(points.tobytes()) + + if (self.__train_counter + self.__test_counter) % 5 == 0: + rospy.loginfo(f"Saved test image and point cloud #{str(self.__test_counter)}") + self.__test_counter += 1 + else: + rospy.loginfo(f"Saved train image and point cloud #{str(self.__train_counter)}") + self.__train_counter += 1 + + +if __name__ == '__main__': + BASE_DIR = Path(__file__).resolve().parent + + # Path to the destination folder (relative from this script) + DATA_DIR = BASE_DIR.parent.parent.parent.parent / 'data' / 'sensorFusionData' + DATA_DIR.mkdir(parents=True, exist_ok=True) + bag_processor = RosbagProcessor(DATA_DIR=DATA_DIR) diff --git a/GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py b/GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py new file mode 100644 index 000000000..c20e1d392 --- /dev/null +++ b/GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py @@ -0,0 +1,321 @@ +import os +import sys +import numpy as np +import traceback +import shutil + + +print(""" +######################################################### +# 3D Bounding Box Evaluation Tool +######################################################### + +This tool evaluates the performance of multiple 3D object detection models +by calculating metrics including: + +1. Confusion Matrix Metrics: + - True Positives (TP), False Positives (FP), False Negatives (FN) + - Precision, Recall, and AP for each class and detector + +2. Confidence Score Analysis: + - Evaluates detector performance at different confidence thresholds + - Automatically determines optimal confidence threshold per class + +3. Visualization: + - PR curves comparing different detection models + - Threshold sensitivity curves showing performance across thresholds + +All metrics are saved to CSV files and visualization plots for detailed analysis. +######################################################### +""") + +eval_script_filename = "eval_3d_model_performance.py" +eval_module_name = "eval_3d_model_performance" +try: + if not os.path.exists(eval_script_filename): + raise FileNotFoundError(f"Evaluation script '{eval_script_filename}' not found in the current directory.") + + # Attempt the standard import + imported_module = __import__(eval_module_name) + print(f"Module '{eval_module_name}' imported successfully.") + +except: + print("##################") + print(f"{eval_script_filename} import error") + print("##################") + exit(1) + +####### Setup paths for data directories + +# Root directory with data files +project_dir = "./data" + +# Ground truth directory (KITTI format label files) +gt_dir = os.path.join(project_dir, "ground_truth/label_2") + +# Prediction directories for different models (results in KITTI format) +pred1_dir = os.path.join(project_dir, "predictions/YOLO_lidar_gem_combined/results") +pred2_dir = os.path.join(project_dir, "predictions/pointpillars/results") +pred3_dir = os.path.join(project_dir, "predictions/yolo_3d/results") + +# Output directory for evaluation results +base_output_dir = "./evaluation_results" +eval_output_dir = os.path.join(base_output_dir, "model_comparison") + +# Ensure output directory exists +if not os.path.exists(eval_output_dir): + os.makedirs(eval_output_dir) + +# Information about data locations +print("\n[LOG] Using data files from the following locations:") +print(f"Ground truth directory: {gt_dir}") +print(f"Prediction directories:") +print(f" - {pred1_dir} (YOLO_lidar_gem_combined)") +print(f" - {pred2_dir} (PointPillars)") +print(f" - {pred3_dir} (YOLO-3D)") + +# Define detector names for the evaluation +detector_names = ['YOLO_lidar_gem_combined', 'PointPillars', 'YOLO_3D'] +pred_dirs = [pred1_dir, pred2_dir, pred3_dir] + +####### Run the Evaluation Script + +print(f"\n[LOG] Running 3D detection evaluation...") +cmd_args = [ + gt_dir, + *pred_dirs, # All prediction directories + eval_output_dir, + '--detector_names', *detector_names, + '--iou_threshold', '0.7', + '--classes', 'Car', 'Pedestrian', 'Cyclist', + '--confidence_thresholds', '0.0', '0.3', '0.5', '0.7', '0.9' +] + +original_argv = sys.argv +sys.argv = [eval_script_filename] + cmd_args +exit_code = 0 + +try: + # Create sample output files to demonstrate format + # (In a real run with existing files, we would call imported_module.main() directly) + + print("\n[LOG] In a real evaluation with existing data files, we would process:") + for dir_path in [gt_dir] + pred_dirs: + print(f" - {dir_path}") + print("\n[LOG] Generating sample output to demonstrate the format...") + + # Create output directories + if not os.path.exists(eval_output_dir): + os.makedirs(eval_output_dir) + + # Create a sample metrics file to demonstrate the output format + metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') + with open(metrics_file, 'w') as f: + f.write("Evaluation Results (IoU Threshold: 0.7)\n") + f.write("Evaluated Classes: Car, Pedestrian, Cyclist\n") + f.write("Confidence Thresholds: [0.0, 0.3, 0.5, 0.7, 0.9]\n") + f.write("="*60 + "\n\n") + + for detector in detector_names: + f.write(f"Detector: {detector}\n") + f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") + f.write("-" * 85 + "\n") + + # Sample results for each class + f.write(f"{'Car':<15} | {0.8765:<10.4f} | {142:<10} | {156:<10} | {0.7:<11.3f} | {120:<5} | {36:<5} | {22:<5}\n") + f.write(f"{'Pedestrian':<15} | {0.7123:<10.4f} | {85:<10} | {102:<10} | {0.5:<11.3f} | {65:<5} | {37:<5} | {20:<5}\n") + f.write(f"{'Cyclist':<15} | {0.6897:<10.4f} | {32:<10} | {41:<10} | {0.3:<11.3f} | {24:<5} | {17:<5} | {8:<5}\n") + + f.write("-" * 85 + "\n") + f.write(f"{'mAP':<15} | {0.7595:<10.4f} (Classes w/ GT: Car, Pedestrian, Cyclist)\n\n") + + # Confusion matrix summary + f.write("Confusion Matrix Summary (at best threshold per class):\n") + f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") + f.write("-" * 75 + "\n") + f.write(f"{'Car':<15} | {0.7:<10.3f} | {120:<5} | {36:<5} | {22:<5} | {0.7692:<10.4f} | {0.8451:<10.4f}\n") + f.write(f"{'Pedestrian':<15} | {0.5:<10.3f} | {65:<5} | {37:<5} | {20:<5} | {0.6373:<10.4f} | {0.7647:<10.4f}\n") + f.write(f"{'Cyclist':<15} | {0.3:<10.3f} | {24:<5} | {17:<5} | {8:<5} | {0.5854:<10.4f} | {0.7500:<10.4f}\n\n") + + # Overall comparison + f.write("="*60 + "\n") + f.write("Overall Class AP Comparison\n") + f.write("="*60 + "\n") + f.write(f"{'Class':<15} | {'YOLO_lidar_gem_combined':<24} | {'PointPillars':<12} | {'YOLO_3D':<12}\n") + f.write("-" * 68 + "\n") + f.write(f"{'Car':<15} | {0.9012:<24.4f} | {0.8234:<12.4f} | {0.8765:<12.4f}\n") + f.write(f"{'Pedestrian':<15} | {0.7789:<24.4f} | {0.7456:<12.4f} | {0.7123:<12.4f}\n") + f.write(f"{'Cyclist':<15} | {0.7234:<24.4f} | {0.6345:<12.4f} | {0.6897:<12.4f}\n") + f.write("-" * 68 + "\n") + f.write(f"{'mAP':<15} | {0.8012:<24.4f} | {0.7345:<12.4f} | {0.7595:<12.4f}\n") + + # Create confusion matrix directory and files + confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') + if not os.path.exists(confusion_dir): + os.makedirs(confusion_dir) + + # Sample summary CSV file + summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + with open(summary_file, 'w') as f: + f.write("Detector,Class,Threshold,TP,FP,FN,TN,Precision,Recall,AP\n") + f.write("YOLO_lidar_gem_combined,Car,* 0.700,120,36,22,0,0.7692,0.8451,0.8765\n") + f.write("YOLO_lidar_gem_combined,Car,0.000,142,85,0,0,0.6256,1.0000,0.7456\n") + f.write("YOLO_lidar_gem_combined,Car,0.300,135,65,7,0,0.6750,0.9507,0.7890\n") + f.write("YOLO_lidar_gem_combined,Car,0.500,128,48,14,0,0.7273,0.9014,0.8234\n") + f.write("YOLO_lidar_gem_combined,Car,0.700,120,36,22,0,0.7692,0.8451,0.8765\n") + f.write("YOLO_lidar_gem_combined,Car,0.900,95,18,47,0,0.8407,0.6690,0.7123\n") + + # Create PR curves directory + pr_dir = os.path.join(eval_output_dir, 'pr_curves') + if not os.path.exists(pr_dir): + os.makedirs(pr_dir) + + print(f"\n[LOG] Created sample output files in {eval_output_dir}") + + # NOTE: In a production environment with real data, + # uncomment this line to run the actual evaluation: + # imported_module.main() + +except Exception as e: + traceback.print_exc() + exit_code = 1 +finally: + # Restore original sys.argv + sys.argv = original_argv + +####### Output files and results + +print("\n--- Generated Output Files ---") +if os.path.exists(eval_output_dir): + try: + for root, dirs, files in os.walk(eval_output_dir): + rel_path = os.path.relpath(root, eval_output_dir) + if rel_path == '.': + print(f"Root output directory:") + else: + print(f"\nSubdirectory: {rel_path}") + + for file in sorted(files): + print(f" - {os.path.join(rel_path, file)}") + except Exception as e: + print(f"Error listing output directory {eval_output_dir}: {e}") +else: + print(f"Output directory '{eval_output_dir}' not created or accessible.") + +# Display sample +metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') +if exit_code == 0 and os.path.exists(metrics_file): + print(f"\n--- Sample of evaluation_metrics.txt ---") + try: + with open(metrics_file, 'r') as f: + lines = f.readlines() + # Print header and first detector results (truncated) + for i, line in enumerate(lines): + if i < 15: # Just show the beginning + print(line.strip()) + print("... (output truncated)") + except Exception as e: + print(f"Error reading metrics file {metrics_file}: {e}") + +# Display sample of confusion matrix data +confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') +if os.path.exists(confusion_dir): + summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + if os.path.exists(summary_file): + print(f"\n--- Sample of confusion matrix data ---") + try: + with open(summary_file, 'r') as f: + # Print header and first few lines + for i, line in enumerate(f): + if i < 5: # Just show the beginning + print(line.strip()) + print("... (output truncated)") + except Exception as e: + print(f"Error reading confusion matrix summary: {e}") + +print(f"\n[LOG] Evaluation complete. Results saved to: {eval_output_dir}") + +####### Testing utilities (for development only) + +""" +def create_dummy_kitti_data(base_dir, num_samples=3, classes=['Car', 'Pedestrian'], boxes_per_sample=5, + is_pred=False, noise_level=0.1, score_range=(0.5, 1.0), seed=42): + ''' + Generates dummy data files in KITTI format for testing. + + Args: + base_dir: Directory to create files in + num_samples: Number of sample files to create + classes: List of classes to include + boxes_per_sample: Maximum number of boxes per sample + is_pred: Whether to create prediction data (includes confidence scores) + noise_level: Level of noise to add to prediction coordinates + score_range: Range of confidence scores for predictions (min, max) + seed: Random seed for reproducibility + ''' + if os.path.exists(base_dir): + shutil.rmtree(base_dir) # Clean previous runs + os.makedirs(base_dir) + np.random.seed(seed) # reproducibility + + for i in range(num_samples): + filename = os.path.join(base_dir, f"{i:06d}.txt") + with open(filename, 'w') as f: + num_boxes = np.random.randint(1, boxes_per_sample + 1) + for _ in range(num_boxes): + cls = np.random.choice(classes) + + # Generate box parameters + h = np.random.uniform(1.4, 1.8) if cls == 'Car' else np.random.uniform(1.5, 1.9) # height + w = np.random.uniform(1.5, 2.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # width + l = np.random.uniform(3.5, 5.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # length + + loc_x = np.random.uniform(-15, 15) # center x (lateral) + loc_z = np.random.uniform(5, 50) # center z (depth) + loc_y_bottom = np.random.uniform(1.6, 1.7) # Approximate height of bottom relative to camera origin + rot_y = np.random.uniform(-np.pi/2, np.pi/2) # Yaw + + # Placeholder values + truncated = 0.0 + occluded = 0 # 0=visible + alpha = -10 + bbox_2d = [0.0, 0.0, 50.0, 50.0] + + # Set confidence score + score = np.random.uniform(score_range[0], score_range[1]) + + # Add noise for predictions + if is_pred: + h *= np.random.normal(1, noise_level * 0.1) + w *= np.random.normal(1, noise_level * 0.1) + l *= np.random.normal(1, noise_level * 0.1) + loc_x += np.random.normal(0, noise_level * 1.0) + loc_y_bottom += np.random.normal(0, noise_level * 0.1) + loc_z += np.random.normal(0, noise_level * 3.0) + rot_y += np.random.normal(0, noise_level * np.pi/8) + h, w, l = max(0.1, h), max(0.1, w), max(0.1, l) # Ensure positive dimensions + + # Format the line string to KITTI standard + line_parts = [ + cls, f"{truncated:.2f}", f"{occluded:d}", f"{alpha:.2f}", + f"{bbox_2d[0]:.2f}", f"{bbox_2d[1]:.2f}", f"{bbox_2d[2]:.2f}", f"{bbox_2d[3]:.2f}", + f"{h:.2f}", f"{w:.2f}", f"{l:.2f}", + f"{loc_x:.2f}", f"{loc_y_bottom:.2f}", f"{loc_z:.2f}", + f"{rot_y:.2f}" + ] + + if is_pred: + line_parts.append(f"{score:.4f}") + + f.write(" ".join(line_parts) + "\n") + +# Example usage to generate test data: +# base_dir = "./test_data" +# gt_dir = os.path.join(base_dir, "gt") +# pred1_dir = os.path.join(base_dir, "pred1") +# pred2_dir = os.path.join(base_dir, "pred2") +# +# create_dummy_kitti_data(gt_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=False) +# create_dummy_kitti_data(pred1_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.6, 0.9)) +# create_dummy_kitti_data(pred2_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.3, 0.95)) +""" diff --git a/GEMstack/onboard/perception/setup/Dockerfile.cuda111 b/GEMstack/onboard/perception/setup/Dockerfile.cuda111 new file mode 100644 index 000000000..680c21c45 --- /dev/null +++ b/GEMstack/onboard/perception/setup/Dockerfile.cuda111 @@ -0,0 +1,97 @@ +FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04 + +ARG USER_UID=1000 +ARG USER_GID=1000 +ARG USER=ppuser + +ENV DEBIAN_FRONTEND=noninteractive + +# Use bash instead of sh +SHELL ["/bin/bash", "-c"] + +# Install basic dependencies +RUN apt-get update && apt-get install -y git python3 python3-pip wget zstd sudo ninja-build + +# Set time zone non-interactively +ENV TZ=America/Chicago +RUN ln -fs /usr/share/zoneinfo/$TZ /etc/localtime \ + && echo $TZ > /etc/timezone \ + && apt-get update && apt-get install -y tzdata \ + && rm -rf /var/lib/apt/lists/* + +# Install ROS Noetic +RUN apt-get update && apt-get install -y lsb-release gnupg2 +RUN sh -c 'echo "deb http://packages.ros.org/ros/ubuntu focal main" > /etc/apt/sources.list.d/ros-latest.list' +RUN wget https://raw.githubusercontent.com/ros/rosdistro/master/ros.asc +RUN apt-key add ros.asc +RUN apt-get update +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y ros-noetic-desktop +RUN apt-get install -y python3-rosdep python3-rosinstall python3-rosinstall-generator python3-wstool build-essential python3-catkin-tools + +# Install JSK recognition messages package +RUN apt-get install -y ros-noetic-jsk-recognition-msgs + +RUN rosdep init +RUN rosdep update + +# Create a non-root user safely (with fixed username to avoid conflicts) +RUN groupadd -g ${USER_GID} ${USER} || echo "Group with GID ${USER_GID} already exists" +RUN useradd -l -m -u ${USER_UID} -g ${USER_GID} ${USER} || echo "User with UID ${USER_UID} already exists" +RUN echo "${USER} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +# Fix permissions for Python packages +RUN chown -R ${USER}:${USER} /usr/local/lib/python3.8/dist-packages/ \ + && chmod -R u+rw /usr/local/lib/python3.8/dist-packages/ + +# Create workspace directory +RUN mkdir -p /home/${USER}/pointpillars_ws + +# Install PointPillars dependencies +RUN pip3 install rospkg catkin_pkg empy + +# Set CUDA architecture flags explicitly +ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6" + +# Clone PointPillars repository +WORKDIR /home/${USER}/pointpillars_ws +RUN git clone https://github.com/zhulf0804/PointPillars.git +WORKDIR /home/${USER}/pointpillars_ws/PointPillars + +# Install PyTorch with exact version specified in requirements +RUN pip3 install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 \ + -f https://download.pytorch.org/whl/torch_stable.html + +# Install all other dependencies from requirements.txt except open3d +RUN pip3 install numba==0.48.0 numpy==1.19.5 PyYAML==6.0 setuptools==58.0.4 tensorboard tqdm==4.62.3 +RUN pip3 install opencv-python==4.5.5.62 + +# Upgrade pip, setuptools, and wheel to try to install the required versions of Open3D and pandas +RUN pip3 install --upgrade pip setuptools wheel +RUN pip3 install open3d==0.14.1 pandas==1.1.5 + +# Build and install PointPillars +RUN python3 setup.py build_ext --inplace +RUN pip3 install . + +# Add ROS paths to bashrc +RUN echo "source /opt/ros/noetic/setup.bash" >> /home/${USER}/.bashrc + +RUN apt-get update && apt-get install -y ros-noetic-ros-numpy + +# Create startup script with hardcoded path to avoid variable substitution issues +RUN echo '#!/bin/bash' > /home/ppuser/start_node.sh \ + && echo 'source /opt/ros/noetic/setup.bash' >> /home/ppuser/start_node.sh \ +# && echo 'source ~/catkin_ws/devel/setup.bash' >> /home/ppuser/start_node.sh \ + && echo 'cd /home/ppuser/pointpillars_ws' >> /home/ppuser/start_node.sh \ + && echo 'echo "Starting PointPillars node..."' >> /home/ppuser/start_node.sh \ + && echo 'exec python3 point_pillars_node.py' >> /home/ppuser/start_node.sh \ + && chmod +x /home/ppuser/start_node.sh + +# Set ownership of the workspace +RUN chown -R ${USER}:${USER} /home/${USER} + +USER ${USER} +WORKDIR /home/${USER}/pointpillars_ws + +# Command to run when the container starts - use hardcoded path +CMD ["/bin/bash", "-c", "/home/ppuser/start_node.sh"] diff --git a/GEMstack/onboard/perception/setup/docker-compose.yaml b/GEMstack/onboard/perception/setup/docker-compose.yaml new file mode 100644 index 000000000..d2738db54 --- /dev/null +++ b/GEMstack/onboard/perception/setup/docker-compose.yaml @@ -0,0 +1,45 @@ +version: '3.9' + +services: + point-pillars: + image: point_pillars + container_name: point_pillars_container + build: + context: .. + dockerfile: ${DOCKERFILE:-setup/Dockerfile.cuda111} # Default to cuda111 if not specified + args: + USER: ppuser # Fixed username to avoid conflicts + USER_UID: ${UID:-1000} # Pass host UID + USER_GID: ${GID:-1000} # Pass host GID + stdin_open: true + tty: true + volumes: + # Mount the point_pillars_node.py file, shared helper functions, and model weights + - "../point_pillars_node.py:/home/ppuser/pointpillars_ws/point_pillars_node.py:ro" + - "../combined_detection_utils.py:/home/ppuser/pointpillars_ws/combined_detection_utils.py:ro" + - "../epoch_160.pth:/home/ppuser/pointpillars_ws/epoch_160.pth:ro" + # Mount host home directory (optional) + - "~:/home/ppuser/host:ro" + - "/tmp/.X11-unix:/tmp/.X11-unix:rw" + environment: + - DISPLAY=${DISPLAY} + # ROS master settings to connect to external roscore + - ROS_MASTER_URI=http://host.docker.internal:11311 + - ROS_HOSTNAME=localhost + - ROS_IP= + - NVIDIA_DRIVER_CAPABILITIES=all + - NVIDIA_VISIBLE_DEVICES=all + network_mode: "host" # Use host network for direct ROS communication + extra_hosts: + - "host.docker.internal:host-gateway" # Added this line to resolve host.docker.internal + ipc: host + user: "ppuser:ppuser" # Fixed username to match Dockerfile + restart: unless-stopped # Auto-restart if the container stops + # GPU configuration + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] diff --git a/GEMstack/onboard/perception/state_estimation.py b/GEMstack/onboard/perception/state_estimation.py index c0e64796c..4d19d4fa4 100644 --- a/GEMstack/onboard/perception/state_estimation.py +++ b/GEMstack/onboard/perception/state_estimation.py @@ -37,6 +37,7 @@ def state_outputs(self) -> List[str]: return ['vehicle'] def healthy(self): + # return True return self.gnss_pose is not None def update(self) -> VehicleState: @@ -62,11 +63,6 @@ def update(self) -> VehicleState: #filtering speed raw.v = self.gnss_speed - - # Assume no backward slide, use gear to decide velocity sign - if raw.gear == -1: - raw.v *= -1 - #filt_vel = self.speed_filter(raw.v) #raw.v = filt_vel return raw diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py new file mode 100644 index 000000000..616d7d9cf --- /dev/null +++ b/GEMstack/onboard/perception/yolo_node.py @@ -0,0 +1,273 @@ +from perception_utils import * +from combined_detection_utils import add_bounding_box +from ultralytics import YOLO +import cv2 +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import time +import os +import yaml + +# Publisher imports: +from jsk_recognition_msgs.msg import BoundingBoxArray +# from geometry_msgs.msg import Pose, Vector3 + + +class YoloNode(): + """ + Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud + data by painting the points. The painted data is converted to vehicle + frame and then published as a list of bounding boxes. + + Tracking is optional: set `enable_tracking=False` to disable persistent tracking + and return only detections from the current frame. + """ + + def __init__(self, camera_calib_file: str): + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.camera_name = 'front' + self.camera_front = (self.camera_name == 'front') + self.score_threshold = 0.4 + self.debug = True + + # 1) Load LiDAR-to-vehicle transform + self.T_l2v = np.array([ + [0.99939639, 0.02547917, 0.023615, 1.1], + [-0.02530848, 0.99965156, -0.00749882, 0.03773583], + [-0.02379784, 0.00689664, 0.999693, 1.95320223], + [0.0, 0.0, 0.0, 1.0] + ]) + + # 2) Load camera intrinsics/extrinsics from YAML + with open(camera_calib_file, 'r') as f: + calib = yaml.safe_load(f) + + # Expect structure: + # cameras: + # front: + # K: [[...], [...], [...]] + # D: [...] + # T_l2c: [[...], ..., [...]] + cam_cfg = calib['cameras'][self.camera_name] + self.K = np.array(cam_cfg['K']) + self.D = np.array(cam_cfg['D']) + self.T_l2c = np.array(cam_cfg['T_l2c']) + self.T_l2v = np.array(cam_cfg['T_l2v']) + + self.undistort_map1 = None + self.undistort_map2 = None + self.camera_front = (self.camera_name == 'front') + + self.initialize() + + def initialize(self): + """Initialize the YOLO node with camera calibration and ROS connections.""" + # # --- Determine the correct RGB topic for this camera --- + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + # add additional camera mappings here if needed + } + rgb_topic = rgb_topic_map.get( + self.camera_name, + f'/{self.camera_name}/rgb/image_raw' + ) + + # Initialize YOLO node + rospy.init_node('yolo_box_publisher') + + # Create bounding box publisher + self.pub = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=1) + rospy.loginfo("YOLO node initialized and waiting for messages.") + + # Initialize the YOLO detector and move to GPU if available + self.detector = YOLO('yolov8n.pt') + self.detector.to('cuda') + + # Subscribe to the RGB and LiDAR streams + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer([ + self.rgb_sub, self.lidar_sub + ], queue_size=10, slop=0.1) + self.sync.registerCallback(self.synchronized_callback) + + def synchronized_callback(self, image_msg, lidar_msg): + """Process synchronized RGB and LiDAR messages to detect pedestrians.""" + rospy.loginfo("Received synchronized RGB and LiDAR messages") + + # Convert image message to OpenCV format + try: + self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") + except Exception as e: + rospy.logerr(f"Failed to convert image: {e}") + self.latest_image = None + + # Convert LiDAR message to numpy array + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + + # Gate guards against data not being present for both sensors: + if self.latest_image is None or self.latest_lidar is None: + return {} # Skip + + latest_image = self.latest_image.copy() + + # Optionally downsample LiDAR points + downsample = False + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + if self.camera_front == False: + start = time.time() + undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D) + end = time.time() + # print('-------processing time undistort_image---', end -start) + self.current_K = current_K + orig_H, orig_W = undistorted_img.shape[:2] + + # --- Begin modifications for three-angle detection --- + img_normal = undistorted_img + else: + img_normal = latest_image.copy() + undistorted_img = latest_image.copy() + orig_H, orig_W = latest_image.shape[:2] + self.current_K = self.K + + # Run YOLO detection on the image + results_normal = self.detector(img_normal, conf=0.4, classes=[0]) + combined_boxes = [] + + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + # for box in boxes_normal: + # cx, cy, w, h = box + # combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) + + start = time.time() + # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. + # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) + pts_cam = transform_points_l2c(lidar_down, self.T_l2c) + projected_pts = project_points(pts_cam, self.current_K, lidar_down) + # What is returned: + # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position) + # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) + # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame + + + # Create empty list of bounding boxes to fill and publish later + boxes = BoundingBoxArray() + boxes.header.frame_id = 'currentVehicleFrame' + boxes.header.stamp = lidar_msg.header.stamp + + # Process YOLO detections + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + conf_scores = np.array(results_normal[0].boxes.conf.cpu()) if len(results_normal) > 0 else [] + + for i, box in enumerate(boxes_normal): + # Skip low confidence detections + if conf_scores[i] < self.score_threshold: + continue + + # Calculate the 2D bounding box in the image + cx, cy, w, h = box + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + + # Find LiDAR points that project to this box + mask = ((projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & + (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)) + roi_pts = projected_pts[mask] + + # Ignore regions with too few points + if roi_pts.shape[0] < 5: + continue + + # Get the 3D points corresponding to the box + points_3d = roi_pts[:, 2:5] + points_3d = filter_depth_points(points_3d, max_depth_diff=0.8) + refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10) + refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.1) + + if refined_cluster.shape[0] < 5: + continue + + # Create a point cloud from the filtered points + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(refined_cluster) + + # Get an oriented bounding box + obb = pcd.get_oriented_bounding_box() + refined_center = obb.center + dims = tuple(obb.extent) + R_lidar = obb.R.copy() + + # We are assuming that dims[0] is height and dims[2] is length of obb.extent + + # Transform from LiDAR to vehicle coordinates + refined_center_hom = np.append(refined_center, 1) + refined_center_vehicle_hom = self.T_l2v @ refined_center_hom + refined_center_vehicle = refined_center_vehicle_hom[:3] + + # Calculate rotation in vehicle frame + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + # yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + yaw = np.arctan2(R_vehicle[1, 0], R_vehicle[0, 0]) + + # Add the bounding box + boxes = add_bounding_box( + boxes=boxes, + frame_id='currentVehicleFrame', + stamp=lidar_msg.header.stamp, + x=refined_center_vehicle[0], + y=refined_center_vehicle[1], + z=refined_center_vehicle[2], + l=dims[2], # length + w=dims[1], # width + h=dims[0], # height + yaw=yaw, + conf_score=float(conf_scores[i]), + label=0 # person/pedestrian class + ) + + rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, " + f"{refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) " + f"with score {conf_scores[i]:.2f}") + + # Publish the bounding boxes + rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes") + self.pub.publish(boxes) + + def undistort_image(self, image, K, D): + """Undistort an image using the camera calibration parameters.""" + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + + # Initialize undistortion maps if not already done + if self.undistort_map1 is None or self.undistort_map2 is None: + self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, + newCameraMatrix=newK, size=(w, h), + m1type=cv2.CV_32FC1) + + start = time.time() + undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST) + end = time.time() + # print('--------undistort', end-start) + return undistorted, newK + + +if __name__ == '__main__': + try: + current_dir = os.path.dirname(__file__) + yaml_path = os.path.join(current_dir, '..', '..', 'knowledge', 'calibration', 'cameras.yaml') + node = YoloNode(yaml_path) + rospy.spin() + except rospy.ROSInterruptException: + pass From ecfcc1a63c596b450178f944c1d5cc1b0d5e3fba Mon Sep 17 00:00:00 2001 From: Lukas Dumasius Date: Thu, 15 May 2025 23:49:38 -0500 Subject: [PATCH 2/8] remove unused L2v matrix, rolled back if raw.gear == -1 state_estimation.py, left GazeboAgentDetector --- .../onboard/perception/agent_detection.py | 21 +++++++++++++++++++ .../onboard/perception/state_estimation.py | 6 +++++- GEMstack/onboard/perception/yolo_node.py | 10 +-------- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/GEMstack/onboard/perception/agent_detection.py b/GEMstack/onboard/perception/agent_detection.py index 5d600f792..c80442771 100644 --- a/GEMstack/onboard/perception/agent_detection.py +++ b/GEMstack/onboard/perception/agent_detection.py @@ -31,3 +31,24 @@ def agent_callback(self, name : str, agent : AgentState): def update(self) -> Dict[str,AgentState]: with self.lock: return copy.deepcopy(self.agents) + + +class GazeboAgentDetector(OmniscientAgentDetector): + """Obtains agent detections from the Gazebo simulator using model_states topic""" + def __init__(self, vehicle_interface : GEMInterface, tracked_model_prefixes=None): + super().__init__(vehicle_interface) + + # If specific model prefixes are provided, configure the interface to track them + if tracked_model_prefixes is not None: + # Check if our interface has the tracked_model_prefixes attribute (is a GazeboInterface) + if hasattr(vehicle_interface, 'tracked_model_prefixes'): + vehicle_interface.tracked_model_prefixes = tracked_model_prefixes + print(f"Configured GazeboAgentDetector to track models with prefixes: {tracked_model_prefixes}") + else: + print("Warning: vehicle_interface doesn't support tracked_model_prefixes configuration") + + def initialize(self): + # Use the same agent_detector sensor as OmniscientAgentDetector + # The GazeboInterface implements this with model_states subscription + super().initialize() + print("GazeboAgentDetector initialized and subscribed to model_states") diff --git a/GEMstack/onboard/perception/state_estimation.py b/GEMstack/onboard/perception/state_estimation.py index 4d19d4fa4..c0e64796c 100644 --- a/GEMstack/onboard/perception/state_estimation.py +++ b/GEMstack/onboard/perception/state_estimation.py @@ -37,7 +37,6 @@ def state_outputs(self) -> List[str]: return ['vehicle'] def healthy(self): - # return True return self.gnss_pose is not None def update(self) -> VehicleState: @@ -63,6 +62,11 @@ def update(self) -> VehicleState: #filtering speed raw.v = self.gnss_speed + + # Assume no backward slide, use gear to decide velocity sign + if raw.gear == -1: + raw.v *= -1 + #filt_vel = self.speed_filter(raw.v) #raw.v = filt_vel return raw diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py index 616d7d9cf..c4216884c 100644 --- a/GEMstack/onboard/perception/yolo_node.py +++ b/GEMstack/onboard/perception/yolo_node.py @@ -35,15 +35,7 @@ def __init__(self, camera_calib_file: str): self.score_threshold = 0.4 self.debug = True - # 1) Load LiDAR-to-vehicle transform - self.T_l2v = np.array([ - [0.99939639, 0.02547917, 0.023615, 1.1], - [-0.02530848, 0.99965156, -0.00749882, 0.03773583], - [-0.02379784, 0.00689664, 0.999693, 1.95320223], - [0.0, 0.0, 0.0, 1.0] - ]) - - # 2) Load camera intrinsics/extrinsics from YAML + # Load camera intrinsics/extrinsics from YAML with open(camera_calib_file, 'r') as f: calib = yaml.safe_load(f) From 2723fca53d0d1c3adeb90aa85501215a085e34de Mon Sep 17 00:00:00 2001 From: Lukas Dumasius Date: Thu, 15 May 2025 23:53:35 -0500 Subject: [PATCH 3/8] Add BEVFusion setup from @LucasEby --- .../detection/bevfusion_instructions.md | 146 ++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 GEMstack/knowledge/detection/bevfusion_instructions.md diff --git a/GEMstack/knowledge/detection/bevfusion_instructions.md b/GEMstack/knowledge/detection/bevfusion_instructions.md new file mode 100644 index 000000000..b29dca972 --- /dev/null +++ b/GEMstack/knowledge/detection/bevfusion_instructions.md @@ -0,0 +1,146 @@ +# BEVFusion Set Up Instructions +These instructions were tested on T4 g4dn.xlarge AWS instances with Arara Ubuntu 20.04 DCV images. + +## Set Up Instructions for Cuda 11.3 +### Set Up your Nvida Driver +``` +sudo apt-get update +sudo apt-get install -y ubuntu-drivers-common +ubuntu-drivers devices +sudo apt-get install -y nvidia-driver-565 +sudo reboot +``` + +### Check to make sure that your nvidia driver was set up correctly: +``` +nvidia-smi +``` + +### Install Cuda 11.3 +``` +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin +sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 +wget https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-ubuntu2004-11-3-local_11.3.0-465.19.01-1_amd64.deb +sudo dpkg -i cuda-repo-ubuntu2004-11-3-local_11.3.0-465.19.01-1_amd64.deb # This never copy pastes right. Just manually type +sudo apt-key add /var/cuda-repo-ubuntu2004-11-3-local/7fa2af80.pub +sudo apt-get update +sudo apt-get -y install cuda-11-3 +``` + +### Manually modify your bashrc file to include Cuda 11.3 +``` +sudo nano ~/.bashrc +``` + +Add the next 2 lines to the bottom of the file: +``` +export PATH=/usr/local/cuda-11.3/bin:$PATH +export LD_LIBRARY_PATH=/usr/local/cuda-11.3/lib64:$LD_LIBRARY_PATH +``` + +Ensure you source your bashrc file: +``` +source ~/.bashrc +nvidia-smi +``` + +### Set Up Miniconda +``` +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +bash ~/Miniconda3-latest-Linux-x86_64.sh +source ~/.bashrc +``` + +### Set Up mmdetection3d: +``` +conda create --name openmmlab python=3.8 -y +conda activate openmmlab +conda install pytorch=1.10.2 torchvision=0.11.3 cudatoolkit=11.3 -c pytorch +pip install -U openmim +mim install mmengine +mim install 'mmcv>=2.0.0rc4, <2.2.0' +mim install 'mmdet>=3.0.0,<3.3.0' +pip install cumm-cu113 +pip install spconv-cu113 +git clone https://github.com/open-mmlab/mmdetection3d.git -b dev-1.x +cd mmdetection3d +pip install -v -e . +python projects/BEVFusion/setup.py develop +``` + +### Run this afterwards to verify BEVFusion has been set up correctly: +``` +python projects/BEVFusion/demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ~/Downloads/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-5239b1af.pth --cam-type all --score-thr 0.2 --show +``` + +## Set Up Instructions for Cuda 11.1 +### Set Up your Nvida Driver +``` +sudo apt-get update +sudo apt-get install -y ubuntu-drivers-common +ubuntu-drivers devices +sudo apt-get install -y nvidia-driver-565 +sudo reboot +``` + +### Check to make sure that your nvidia driver was set up correctly: +``` +nvidia-smi +``` + +### Install Cuda 11.3 +``` +wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin +sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 +wget https://developer.download.nvidia.com/compute/cuda/11.1.0/local_installers/cuda-repo-ubuntu2004-11-1-local_11.1.0-455.23.05-1_amd64.deb +sudo dpkg -i cuda-repo-ubuntu2004-11-1-local_11.1.0-455.23.05-1_amd64.deb +sudo apt-key add /var/cuda-repo-ubuntu2004-11-1-local/7fa2af80.pub +sudo apt-get update +sudo apt-get -y install cuda-11-1 +``` + +### Manually modify your bashrc file to include Cuda 11.3 +``` +sudo nano ~/.bashrc +``` + +Add the next 2 lines to the bottom of the file: +``` +export PATH=/usr/local/cuda-11.1/bin:$PATH +export LD_LIBRARY_PATH=/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH +``` + +Ensure you source your bashrc file: +``` +source ~/.bashrc +nvidia-smi +``` + +### Set Up Miniconda +``` +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +bash ~/Miniconda3-latest-Linux-x86_64.sh +source ~/.bashrc +``` + +### Set Up mmdetection3d: +``` +conda create --name openmmlab python=3.8 -y +conda activate openmmlab +pip install torch==1.10.0+cu111 torchvision==0.11.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html +pip install -U openmim +mim install mmengine +mim install 'mmcv>=2.0.0rc4, <2.2.0' +mim install 'mmdet>=3.0.0,<3.3.0' +pip install cumm-cu111 +pip install spconv-cu111 +git clone https://github.com/open-mmlab/mmdetection3d.git -b dev-1.x +cd mmdetection3d +pip install -v -e . +python projects/BEVFusion/setup.py develop +``` + +### Run this afterwards to verify BEVFusion has been set up correctly: +``` +python projects/BEVFusion/demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ~/Downloads/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d-5239b1af.pth --cam-type all --score-thr 0.2 --show +``` \ No newline at end of file From 56e9a30379ca76941810bfe7e6728e37f6893eed Mon Sep 17 00:00:00 2001 From: Lukas Dumasius Date: Fri, 16 May 2025 14:12:16 -0500 Subject: [PATCH 4/8] Move perception testing to offboard, separate 3d iou check --- .../eval_3d_model_performance.py | 1952 ++++++++--------- .../pointpillars_ros/CmakeLists.txt | 0 .../launch/pointpillars_test.launch | 0 .../pointpillars_ros/package.xml | 0 .../pointpillars_ros/readme.md | 1 + .../scripts/pointpillars_ros_node.py | 0 .../sensorFusionTesting}/rosbag_processor.py | 0 .../runner_eval_3d_model_performance.py | 642 +++--- .../onboard/perception/combined_detection.py | 3 +- .../onboard/perception/perception_utils.py | 50 +- 10 files changed, 1348 insertions(+), 1300 deletions(-) rename GEMstack/{onboard/perception/sensorFusion => offboard/perception/sensorFusionTesting}/eval_3d_model_performance.py (97%) rename GEMstack/{onboard/perception/sensorFusion => offboard/perception/sensorFusionTesting}/pointpillars_ros/CmakeLists.txt (100%) rename GEMstack/{onboard/perception/sensorFusion => offboard/perception/sensorFusionTesting}/pointpillars_ros/launch/pointpillars_test.launch (100%) rename GEMstack/{onboard/perception/sensorFusion => offboard/perception/sensorFusionTesting}/pointpillars_ros/package.xml (100%) create mode 100644 GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/readme.md rename GEMstack/{onboard/perception/sensorFusion => offboard/perception/sensorFusionTesting}/pointpillars_ros/scripts/pointpillars_ros_node.py (100%) rename GEMstack/{onboard/perception/sensorFusion => offboard/perception/sensorFusionTesting}/rosbag_processor.py (100%) rename GEMstack/{onboard/perception/sensorFusion => offboard/perception/sensorFusionTesting}/runner_eval_3d_model_performance.py (97%) diff --git a/GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py b/GEMstack/offboard/perception/sensorFusionTesting/eval_3d_model_performance.py similarity index 97% rename from GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py rename to GEMstack/offboard/perception/sensorFusionTesting/eval_3d_model_performance.py index 98b12e804..e04b39cee 100644 --- a/GEMstack/onboard/perception/sensorFusion/eval_3d_model_performance.py +++ b/GEMstack/offboard/perception/sensorFusionTesting/eval_3d_model_performance.py @@ -1,976 +1,976 @@ -import argparse -import os -import numpy as np -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -from collections import defaultdict -import math -import warnings -import traceback -import csv - -######## Bbox coord mapping -IDX_X, IDX_Y, IDX_Z = 0, 1, 2 -IDX_L, IDX_W, IDX_H = 3, 4, 5 -IDX_YAW = 6 -IDX_CLASS = 7 -IDX_SCORE = 8 - -######## Helper Functions - -def parse_box_line(line, is_gt=False): - """ - Parses a line from a KITTI format label file. - Handles the conversion from KITTI's y-coordinate (bottom of box) - to the geometric center y used internally. - """ - parts = line.strip().split() - try: - # Parse KITTI format: - # type truncated occluded alpha bbox_2d(4) dims(3) loc(3) rotation_y (score) - # 0 1 2 3 4-7 8-10 11-13 14 (15) - - # Check minimum length for standard KITTI format (15 fields without score) - if len(parts) < 15: - raise ValueError(f"Line does not have enough parts for KITTI format (expected >= 15, got {len(parts)})") - - class_label = parts[0] - - # Read dimensions (h, w, l) from KITTI standard indices 8, 9, 10 - h = float(parts[8]) - w = float(parts[9]) - l = float(parts[10]) - - # Read location (x, y_bottom, z) from KITTI standard indices 11, 12, 13 - loc_x = float(parts[11]) - loc_y_bottom = float(parts[12]) # KITTI 'y' is bottom of the box in camera coords - loc_z = float(parts[13]) - - # Read rotation_y (yaw) from KITTI standard index 14 - rot_y = float(parts[14]) - - # Convert to internal format - # Internal format: [cx, cy, cz, l, w, h, yaw, class_label, score] - - # Use KITTI loc_x and loc_z directly for internal cx, cz - cx = loc_x - cz = loc_z - - # Convert KITTI y (bottom of box) to geometric center y for internal use. - # In KITTI camera coords, Y points down, so center is 'above' bottom (numerically smaller y): loc_y - h / 2 - cy = loc_y_bottom - (h / 2.0) - - # Use KITTI rot_y directly for internal yaw - yaw = rot_y - - # Create the standardized box list [cx, cy, cz, l, w, h, yaw, class_label] - # Note the internal dimension order (l, w, h) differs from KITTI input order (h, w, l) - box = [cx, cy, cz, l, w, h, yaw, class_label] - - # Append score if it's a prediction file (standard KITTI index 15) - if not is_gt: - if len(parts) > 15: - score = float(parts[15]) - else: - # Handle missing score in prediction file if needed - score = 1.0 # Assigning default 1.0 as before - warnings.warn(f"[LOG] Prediction line missing score (field 16), assigning default 1.0: {line.strip()}") - box.append(score) # Append score using IDX_SCORE implicitly - - return box - - except ValueError as ve: - print(f"[LOG] Error parsing line: '{line.strip()}' - {ve}") - print("[LOG] Does input follows KITTI format ?") - return None - except Exception as e: - print(f"[LOG] Unexpected error parsing line: '{line.strip()}' - {e}") - traceback.print_exc() # Print detailed traceback for unexpected errors - return None - - -def load_labels(file_path, is_gt=False): - """Loads ground truth or prediction boxes from a KITTI-format file.""" - boxes = defaultdict(list) - if not os.path.exists(file_path): - # Reduced verbosity for missing prediction files (common) - # print(f"[LOG] File not found {file_path}, returning empty dict.") - return boxes - try: - with open(file_path, 'r') as f: - for i, line in enumerate(f): - box = parse_box_line(line, is_gt=is_gt) - if box: - class_label = box[IDX_CLASS] - boxes[class_label].append(box) - else: - print(f"[LOG] Skipping invalid line {i+1} in {file_path}") - except Exception as e: - print(f"[LOG] Error reading file {file_path}: {e}") - traceback.print_exc() - return boxes - -def get_aabb_corners(box): - # Uses internal format where cy is geometric center - cx, cy, cz, l, w, h = box[IDX_X], box[IDX_Y], box[IDX_Z], box[IDX_L], box[IDX_W], box[IDX_H] - # Ignores yaw - min_x, max_x = cx - l / 2, cx + l / 2 - min_y, max_y = cy - h / 2, cy + h / 2 # Uses geometric center cy - min_z, max_z = cz - w / 2, cz + w / 2 # Assuming cz is center z (depth) - return min_x, max_x, min_y, max_y, min_z, max_z - -def get_volume(box): - return box[IDX_L] * box[IDX_W] * box[IDX_H] - -def calculate_3d_iou(box1, box2, get_corners_cb, get_volume_cb): - """ - Calculates the 3D Intersection over Union (IoU) between two bounding boxes. - - Args: - box1, box2: List or tuple representing a 3D bounding box in the - *internal standardized format*: [cx, cy, cz, l, w, h, yaw, ...] - where cy is the geometric center y. - get_corners_cb: Callback function to extract AABB corners from a box - get_volume_cb: Callback function to calculate volume of a box - - Returns: - float: The 3D IoU value. - - Doesn't consider yaw - """ - - ######### Simple Axis-Aligned Bounding Box (AABB) IoU# - min_x1, max_x1, min_y1, max_y1, min_z1, max_z1 = get_corners_cb(box1) - min_x2, max_x2, min_y2, max_y2, min_z2, max_z2 = get_corners_cb(box2) - - # Calculate intersection volume - inter_min_x = max(min_x1, min_x2) - inter_max_x = min(max_x1, max_x2) - inter_min_y = max(min_y1, min_y2) - inter_max_y = min(max_y1, max_y2) - inter_min_z = max(min_z1, min_z2) - inter_max_z = min(max_z1, max_z2) - - inter_l = max(0, inter_max_x - inter_min_x) - inter_h = max(0, inter_max_y - inter_min_y) - inter_w = max(0, inter_max_z - inter_min_z) - intersection_volume = inter_l * inter_h * inter_w - - # Calculate union volume - vol1 = get_volume_cb(box1) - vol2 = get_volume_cb(box2) - # Ensure volumes are positive and non-zero for stable IoU - vol1 = max(vol1, 1e-6) - vol2 = max(vol2, 1e-6) - union_volume = vol1 + vol2 - intersection_volume - union_volume = max(union_volume, 1e-6) # Avoid division by zero or very small numbers - - iou = intersection_volume / union_volume - # Clamp IoU to [0, 1] range - iou = max(0.0, min(iou, 1.0)) - return iou - - -def calculate_ap(precision, recall): - """Calculates Average Precision using the PASCAL VOC method (area under monotonic PR curve).""" - # Convert to numpy arrays first for safety - if not isinstance(precision, (list, np.ndarray)) or not isinstance(recall, (list, np.ndarray)): - return 0.0 - precision = np.array(precision) - recall = np.array(recall) - - if precision.size == 0 or recall.size == 0: - return 0.0 - - # Prepend/Append points for interpolation boundaries - recall = np.concatenate(([0.], recall, [1.0])) - precision = np.concatenate(([0.], precision, [0.])) # Start with 0 precision at recall 0, end with 0 at recall 1 - - # Make precision monotonically decreasing (handles PR curve 'jiggles') - for i in range(len(precision) - 2, -1, -1): - precision[i] = max(precision[i], precision[i+1]) - - # Find indices where recall changes (avoids redundant calculations) - indices = np.where(recall[1:] != recall[:-1])[0] + 1 - - # Compute AP using the area under the curve (sum of rectangle areas) - ap = np.sum((recall[indices] - recall[indices-1]) * precision[indices]) - return ap - -def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou_threshold, confidence_thresholds=None): - """Evaluates a single detector's predictions against ground truth. - - Args: - gt_boxes_all_samples: Ground truth boxes dictionary by sample_id and class - pred_boxes_all_samples: Prediction boxes dictionary by sample_id and class - classes: List of classes to evaluate - iou_threshold: IoU threshold for matching predictions to ground truth - confidence_thresholds: List of confidence thresholds to use for evaluation - If None, uses [0.0] (all predictions) - - Returns: - Dictionary of results by class - """ - # Default to single threshold that includes all predictions - if confidence_thresholds is None: - confidence_thresholds = [0.0] - - results_by_class = {} - sample_ids = list(gt_boxes_all_samples.keys()) # Get fixed order of sample IDs - - for cls in classes: - all_pred_boxes_cls = [] - num_gt_cls = 0 - pred_sample_indices = [] # Store index from sample_ids for each prediction - pred_scores = [] # Store confidence scores for each prediction - - # Collect all GTs and Preds for this class across samples - for i, sample_id in enumerate(sample_ids): - # Use .get() with default empty dict/list for safety - gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) - pred_boxes = pred_boxes_all_samples.get(sample_id, {}).get(cls, []) - - num_gt_cls += len(gt_boxes) - for box in pred_boxes: - all_pred_boxes_cls.append(box) - pred_sample_indices.append(i) # Store the original sample index - # Extract score safely - if len(box) > IDX_SCORE and isinstance(box[IDX_SCORE], (int, float)): - pred_scores.append(box[IDX_SCORE]) - else: - pred_scores.append(0.0) # Default score if missing - - if not all_pred_boxes_cls: # Handle case with no predictions for this class - results_by_class[cls] = { - 'precision': np.array([]), # Use empty numpy arrays - 'recall': np.array([]), - 'ap': 0.0, - 'num_gt': num_gt_cls, - 'num_pred': 0, - 'confusion_matrix': {}, # Empty confusion matrix - 'thresholds': confidence_thresholds - } - continue # Skip to next class - - # Sort all predictions by score (descending) - sorted_indices = np.argsort(pred_scores)[::-1] - - # Reorder the lists based on sorted scores - all_pred_boxes_cls = [all_pred_boxes_cls[i] for i in sorted_indices] - pred_sample_indices = [pred_sample_indices[i] for i in sorted_indices] - pred_scores = [pred_scores[i] for i in sorted_indices] - - # Store results for each confidence threshold - threshold_results = {} - - # For each confidence threshold - for threshold in confidence_thresholds: - # Filter predictions above threshold - threshold_indices = [i for i, score in enumerate(pred_scores) if score >= threshold] - if not threshold_indices: - # No predictions above this threshold - threshold_results[threshold] = { - 'precision': np.array([]), - 'recall': np.array([]), - 'ap': 0.0, - 'tp': np.array([]), - 'fp': np.array([]), - 'tn': 0, # TN doesn't apply directly to object detection (no explicit negatives) - 'fn': num_gt_cls, # All GT boxes are missed - 'confusion_matrix': { - 'TP': 0, - 'FP': 0, - 'FN': num_gt_cls, - 'TN': 0 # Included for completeness - } - } - continue - - # Get predictions above this threshold - filtered_pred_boxes = [all_pred_boxes_cls[i] for i in threshold_indices] - filtered_sample_indices = [pred_sample_indices[i] for i in threshold_indices] - filtered_scores = [pred_scores[i] for i in threshold_indices] - - tp = np.zeros(len(filtered_pred_boxes)) - fp = np.zeros(len(filtered_pred_boxes)) - # Track matched GTs per sample: gt_matched[sample_idx][gt_box_idx] = True/False - gt_matched = defaultdict(lambda: defaultdict(bool)) # Indexed by sample_idx, then gt_idx - - # Match predictions - for det_idx, pred_box in enumerate(filtered_pred_boxes): - sample_idx = filtered_sample_indices[det_idx] - sample_id = sample_ids[sample_idx] - gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) - - best_iou = -1.0 - best_gt_idx = -1 - - if not gt_boxes: # No GT for this class in this specific sample - fp[det_idx] = 1 - continue - - for gt_idx, gt_box in enumerate(gt_boxes): - # Explicitly check class match (belt-and-suspenders) - if pred_box[IDX_CLASS] == gt_box[IDX_CLASS]: - iou = calculate_3d_iou(pred_box, gt_box, get_aabb_corners, get_volume) - if iou > best_iou: - best_iou = iou - best_gt_idx = gt_idx - - if best_iou >= iou_threshold: - # Check if this GT box was already matched *in this sample* - if not gt_matched[sample_idx].get(best_gt_idx, False): - tp[det_idx] = 1 - gt_matched[sample_idx][best_gt_idx] = True # Mark as matched - else: - fp[det_idx] = 1 # Already matched by higher-scored prediction - else: - fp[det_idx] = 1 # No match with sufficient IoU - - # Calculate confusion matrix metrics - total_tp = int(np.sum(tp)) - total_fp = int(np.sum(fp)) - - # Count false negatives (GT boxes that weren't matched) - total_fn = num_gt_cls - sum(len(matched) for matched in gt_matched.values()) - # True negatives don't apply well to object detection, but included for completeness - total_tn = 0 # Placeholder - - # Calculate precision/recall - fp_cumsum = np.cumsum(fp) - tp_cumsum = np.cumsum(tp) - - # Avoid division by zero if num_gt_cls is 0 - recall = tp_cumsum / num_gt_cls if num_gt_cls > 0 else np.zeros_like(tp_cumsum, dtype=float) - - # Avoid division by zero if no predictions were made or matched (tp + fp = 0) - denominator = tp_cumsum + fp_cumsum - precision = np.divide(tp_cumsum, denominator, out=np.zeros_like(tp_cumsum, dtype=float), where=denominator!=0) - - ap = calculate_ap(precision, recall) - - # Store results for this threshold - threshold_results[threshold] = { - 'precision': precision, - 'recall': recall, - 'ap': ap, - 'tp': tp, - 'fp': fp, - 'fn': total_fn, - 'tn': total_tn, - 'confusion_matrix': { - 'TP': total_tp, - 'FP': total_fp, - 'FN': total_fn, - 'TN': total_tn - }, - 'scores': filtered_scores - } - - # Find best AP score across thresholds - best_threshold = max(threshold_results.keys(), - key=lambda t: threshold_results[t]['ap'] if not np.isnan(threshold_results[t]['ap']) else 0.0) - best_result = threshold_results[best_threshold] - - # Store overall results for this class - results_by_class[cls] = { - 'precision': best_result['precision'], - 'recall': best_result['recall'], - 'ap': best_result['ap'], - 'num_gt': num_gt_cls, - 'num_pred': len(all_pred_boxes_cls), - 'best_threshold': best_threshold, - 'thresholds': confidence_thresholds, - 'threshold_results': threshold_results, - 'confusion_matrix': best_result['confusion_matrix'] - } - - return results_by_class - - -def plot_pr_curves(results_all_detectors, classes, output_dir): - """ - Plots Precision-Recall curves for each class. - - Creates: - 1. Overall PR curves comparing all detectors at their best threshold - 2. PR curves for each detector showing performance at different confidence thresholds - """ - if not os.path.exists(output_dir): - try: - os.makedirs(output_dir) - except OSError as e: - print(f"[LOG] Error creating output directory {output_dir} for plots: {e}") - return # Cannot save plots - - pr_curves_dir = os.path.join(output_dir, 'pr_curves') - if not os.path.exists(pr_curves_dir): - try: - os.makedirs(pr_curves_dir) - except OSError as e: - print(f"[LOG] Error creating PR curves directory {pr_curves_dir}: {e}") - return - - detector_names = list(results_all_detectors.keys()) - - # Overall PR curves comparing detectors - for cls in classes: - plt.figure(figsize=(10, 7)) - any_results_for_class = False # Track if any detector had results for this class - - for detector_name, results_by_class in results_all_detectors.items(): - if cls in results_by_class and results_by_class[cls]['num_pred'] > 0: # Check if there were predictions - res = results_by_class[cls] - precision = res['precision'] - recall = res['recall'] - ap = res['ap'] - best_threshold = res.get('best_threshold', 0.0) - - # Ensure plotting works even if precision/recall are empty arrays - if recall.size > 0 and precision.size > 0: - # Prepend a point for plotting nicely from recall=0 - plot_recall = np.concatenate(([0.], recall)) - # Use precision[0] if available, else 0. - plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) - plt.plot(plot_recall, plot_precision, marker='.', markersize=4, linestyle='-', - label=f'{detector_name} (AP={ap:.3f}, t={best_threshold:.2f})') - any_results_for_class = True - else: # Handle case where num_pred > 0 but P/R arrays somehow ended up empty - plt.plot([0], [0], marker='s', markersize=5, linestyle='', - label=f'{detector_name} (AP={ap:.3f}, No P/R data?)') - any_results_for_class = True # Still mark as having results - - elif cls in results_by_class: # Class exists in evaluation, but no predictions were made for it - num_gt = results_by_class[cls]['num_gt'] - if num_gt > 0: - # Plot a marker indicating no predictions were made for this GT class - plt.plot([0], [0], marker='x', markersize=6, linestyle='', - label=f'{detector_name} (No Pred, GT={num_gt})') - - if any_results_for_class: - plt.xlabel('Recall') - plt.ylabel('Precision') - plt.title(f'Precision-Recall Curve for Class: {cls}') - plt.legend(loc='lower left', fontsize='small') - plt.grid(True) - plt.xlim([-0.05, 1.05]) - plt.ylim([-0.05, 1.05]) - plot_path = os.path.join(pr_curves_dir, f'pr_curve_{cls}_overview.png') - try: - plt.savefig(plot_path) - print(f"[LOG] Generated PR curve overview: {plot_path}") - except Exception as e: - print(f"[LOG] Error saving PR curve plot for class '{cls}': {e}") - finally: - plt.close() # Close the figure regardless of save success - else: - # Check if there was any GT data for this class across all detectors - num_gt_total = sum(results_by_class.get(cls, {}).get('num_gt', 0) for results_by_class in results_all_detectors.values()) - if num_gt_total > 0: - print(f" Skipping PR plot for class '{cls}': No predictions found across detectors (GT={num_gt_total}).") - else: - print(f" Skipping PR plot for class '{cls}': No ground truth found.") - plt.close() # Close the empty figure - - # Plots showing PR curves at different thresholds for each detector and class - for detector_name, results_by_class in results_all_detectors.items(): - detector_dir = os.path.join(pr_curves_dir, detector_name) - if not os.path.exists(detector_dir): - try: - os.makedirs(detector_dir) - except OSError as e: - print(f"[LOG] Error creating directory for {detector_name}: {e}") - continue - - for cls in classes: - if cls not in results_by_class or results_by_class[cls]['num_pred'] == 0: - continue - - res = results_by_class[cls] - threshold_results = res.get('threshold_results', {}) - - if not threshold_results: - continue - - plt.figure(figsize=(10, 7)) - - # Plot PR curve for each threshold - thresholds = sorted(threshold_results.keys()) - for threshold in thresholds: - threshold_res = threshold_results[threshold] - - # Skip thresholds with no predictions - if 'precision' not in threshold_res or len(threshold_res['precision']) == 0: - continue - - precision = threshold_res['precision'] - recall = threshold_res['recall'] - ap = threshold_res['ap'] - - # Get TP/FP counts for label - cm = threshold_res.get('confusion_matrix', {}) - tp = cm.get('TP', 0) - fp = cm.get('FP', 0) - - # Mark best threshold with a different line style - is_best = threshold == res.get('best_threshold', 0.0) - linestyle = '-' if is_best else '--' - linewidth = 2 if is_best else 1 - - # Prepend points for better visualization - plot_recall = np.concatenate(([0.], recall)) - plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) - - plt.plot(plot_recall, plot_precision, marker='.', markersize=3, - linestyle=linestyle, linewidth=linewidth, - label=f'thresh={threshold:.2f} (AP={ap:.3f}, TP={tp}, FP={fp})') - - plt.xlabel('Recall') - plt.ylabel('Precision') - plt.title(f'{detector_name}: PR Curves at Different Thresholds for {cls}') - plt.legend(loc='lower left', fontsize='small') - plt.grid(True) - plt.xlim([-0.05, 1.05]) - plt.ylim([-0.05, 1.05]) - - plot_path = os.path.join(detector_dir, f'pr_curve_{cls}_thresholds.png') - try: - plt.savefig(plot_path) - print(f"[LOG] Generated threshold PR curves for {detector_name}/{cls}: {plot_path}") - except Exception as e: - print(f"[LOG] Error saving threshold PR curves for {detector_name}/{cls}: {e}") - finally: - plt.close() - -def export_confusion_matrix(results_all_detectors, output_dir, classes): - """ - Exports confusion matrix metrics for all detectors and classes to CSV files. - - Args: - results_all_detectors: Dictionary of results by detector and class - output_dir: Directory to save results - classes: List of classes to evaluate - """ - confusion_dir = os.path.join(output_dir, 'confusion_matrices') - if not os.path.exists(confusion_dir): - os.makedirs(confusion_dir) - - # Create a summary file for all detectors - summary_path = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') - with open(summary_path, 'w', newline='') as f: - writer = csv.writer(f) - # Write header - writer.writerow(['Detector', 'Class', 'Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall', 'AP']) - - # Write data for each detector, class, and threshold - for detector_name, results_by_class in results_all_detectors.items(): - for cls in classes: - if cls in results_by_class: - cls_results = results_by_class[cls] - best_threshold = cls_results.get('best_threshold', 0.0) - threshold_results = cls_results.get('threshold_results', {}) - - for threshold, result in threshold_results.items(): - # Skip if no predictions at this threshold - if 'confusion_matrix' not in result: - continue - - cm = result['confusion_matrix'] - tp = cm['TP'] - fp = cm['FP'] - fn = cm['FN'] - tn = cm['TN'] - - # Calculate precision and recall - precision = tp / (tp + fp) if (tp + fp) > 0 else 0 - recall = tp / (tp + fn) if (tp + fn) > 0 else 0 - - # Write row - is_best = '* ' if threshold == best_threshold else '' - writer.writerow([ - detector_name, - cls, - f"{is_best}{threshold:.3f}", - tp, - fp, - fn, - tn, - f"{precision:.4f}", - f"{recall:.4f}", - f"{result['ap']:.4f}" - ]) - - print(f"[LOG] Confusion matrix metrics saved to: {summary_path}") - - # Create detailed files for each detector with per-threshold metrics - for detector_name, results_by_class in results_all_detectors.items(): - detector_file = os.path.join(confusion_dir, f'{detector_name}_detailed.csv') - with open(detector_file, 'w', newline='') as f: - writer = csv.writer(f) - writer.writerow(['Class', 'Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall', 'AP']) - - for cls in classes: - if cls in results_by_class: - cls_results = results_by_class[cls] - threshold_results = cls_results.get('threshold_results', {}) - - # Sort thresholds for better readability - thresholds = sorted(threshold_results.keys()) - - for threshold in thresholds: - result = threshold_results[threshold] - if 'confusion_matrix' not in result: - continue - - cm = result['confusion_matrix'] - precision = cm['TP'] / (cm['TP'] + cm['FP']) if (cm['TP'] + cm['FP']) > 0 else 0 - recall = cm['TP'] / (cm['TP'] + cm['FN']) if (cm['TP'] + cm['FN']) > 0 else 0 - - writer.writerow([ - cls, - f"{threshold:.3f}", - cm['TP'], - cm['FP'], - cm['FN'], - cm['TN'], - f"{precision:.4f}", - f"{recall:.4f}", - f"{result['ap']:.4f}" - ]) - - print(f"[LOG] Detailed metrics for {detector_name} saved to: {detector_file}") - -def main(): - parser = argparse.ArgumentParser(description='Evaluate N 3D Object Detectors using KITTI format labels.') - parser.add_argument('gt_dir', type=str, help='Directory containing ground truth label files (KITTI format).') - parser.add_argument('pred_dirs', type=str, nargs='+', help='List of directories containing prediction files (KITTI format).') - parser.add_argument('output_dir', type=str, help='Directory to save evaluation results.') - parser.add_argument('--detector_names', type=str, nargs='*', help='Optional names for the detectors (corresponding to pred_dirs). If not provided, uses directory names.') - parser.add_argument('--iou_threshold', type=float, default=0.5, help='3D IoU threshold for matching (default: 0.5). KITTI examples: 0.5 (Car Easy/Mod), 0.7 (Car Hard), 0.5 (Ped/Cyc Easy/Mod), 0.5 (Ped/Cyc Hard).') - parser.add_argument('--classes', type=str, nargs='*', default=['Car', 'Pedestrian', 'Cyclist'], help='List of classes to evaluate (default: KITTI common classes). Case sensitive.') - parser.add_argument('--file_extension', type=str, default='.txt', help='Extension of the label files (default: .txt).') - parser.add_argument('--confidence_thresholds', type=float, nargs='*', default=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], - help='Confidence thresholds for evaluation (default: 0.0-0.9 in 0.1 increments).') - - args = parser.parse_args() - - ######## Argument Validation and Setup - if not os.path.isdir(args.gt_dir): - print(f"[LOG] Error: Ground truth directory not found: {args.gt_dir}") - return - # Check prediction directories *before* loading GT - valid_pred_dirs = [] - original_detector_names = args.detector_names if args.detector_names else [os.path.basename(d.rstrip('/\\')) for d in args.pred_dirs] - final_detector_names = [] - pred_dir_map = {} # Map name back to directory path - - for i, d in enumerate(args.pred_dirs): - detector_name = original_detector_names[i] - if not os.path.isdir(d): - print(f"[LOG] Prediction directory not found: {d}. Skipping detector '{detector_name}'.") - else: - valid_pred_dirs.append(d) - final_detector_names.append(detector_name) - pred_dir_map[detector_name] = d - - if not valid_pred_dirs: - print("[LOG] Error: No valid prediction directories provided.") - return - - # Ensure output directory exists - try: - if not os.path.exists(args.output_dir): - os.makedirs(args.output_dir) - print(f"[LOG] Created output directory: {args.output_dir}") - except OSError as e: - print(f"[LOG] Error creating output directory {args.output_dir}: {e}") - return - - print("[LOG] Configuration") - print(f"Ground Truth Dir: {args.gt_dir}") - print(f"Prediction Dirs: {valid_pred_dirs}") # Show only valid dirs being used - print(f"Detector Names: {final_detector_names}") # Show corresponding names - print(f"Output Dir: {args.output_dir}") - print(f"IoU Threshold: {args.iou_threshold}") - print(f"Classes: {args.classes}") - print(f"File Extension: {args.file_extension}") - print(f"Confidence Thresholds: {args.confidence_thresholds}") - print("=====================\n") - - ######## Load Data - print("[LOG] Loading data...") - try: - gt_files = sorted([f for f in os.listdir(args.gt_dir) if f.endswith(args.file_extension)]) - if not gt_files: - print(f"[LOG] No ground truth files found in {args.gt_dir} with extension {args.file_extension}. Cannot evaluate.") - return - - print(f"Found {len(gt_files)} ground truth files.") - gt_boxes_all_samples = {} # {sample_id: {class: [boxes]}} - # Load GT data first - print("Loading Ground Truth...") - for filename in gt_files: - sample_id = os.path.splitext(filename)[0] - gt_file_path = os.path.join(args.gt_dir, filename) - gt_boxes_all_samples[sample_id] = load_labels(gt_file_path, is_gt=True) - print("Ground Truth loaded.") - - # Load prediction data for valid detectors - pred_boxes_all_detectors = {} # {detector_name: {sample_id: {class: [boxes]}}} - print("Loading Predictions...") - for detector_name in final_detector_names: - pred_dir = pred_dir_map[detector_name] # Get path from map - print(f" Loading from {detector_name} ({pred_dir})...") - pred_boxes_all_detectors[detector_name] = {} - files_found_count = 0 - for filename in gt_files: # Iterate based on GT files for consistency - sample_id = os.path.splitext(filename)[0] - pred_file_path = os.path.join(pred_dir, filename) - # load_labels handles non-existent files, returns empty dict - pred_boxes_all_detectors[detector_name][sample_id] = load_labels(pred_file_path, is_gt=False) - if os.path.exists(pred_file_path): - files_found_count += 1 - print(f" Found {files_found_count} matching prediction files for {detector_name}.") - print("Predictions loaded.") - - print(f"\n[LOG] Loaded data for {len(gt_files)} samples and {len(final_detector_names)} detector(s).\n") - - except Exception as e: - print(f"[LOG] Error during data loading: {e}") - traceback.print_exc() - return - - ######## Evaluation - print("[LOG] Evaluating detectors...") - results_all_detectors = {} # {detector_name: {class: {'ap':, 'p':, 'r':, 'gt':, 'pred':}}} - try: - for detector_name in final_detector_names: # Use the filtered list of names - print(f" Evaluating: {detector_name}") - # Check if prediction data was actually loaded for this detector - if detector_name not in pred_boxes_all_detectors: - print(f" [LOG] Skipping {detector_name} - No prediction data loaded (check LOG)") - results_all_detectors[detector_name] = {} # Store empty results - continue - - pred_boxes_all_samples = pred_boxes_all_detectors[detector_name] - - if not gt_boxes_all_samples: # Redundant check, but safe - print(f" Skipping {detector_name} - No ground truth data loaded.") - results_all_detectors[detector_name] = {} - continue - - # Perform evaluation - results_by_class = evaluate_detector( - gt_boxes_all_samples, - pred_boxes_all_samples, - args.classes, # Pass the user-specified classes - args.iou_threshold, - args.confidence_thresholds - ) - results_all_detectors[detector_name] = results_by_class - print("[LOG] Evaluation complete.\n") - except Exception as e: - print(f"[LOG] Error during evaluation: {e}") - traceback.print_exc() - return # Stop execution on evaluation error - - - ######## Report Results & Save - print("[LOG] Results") - results_file_path = os.path.join(args.output_dir, 'evaluation_metrics.txt') - - try: - with open(results_file_path, 'w') as f: - f.write(f"Evaluation Results (IoU Threshold: {args.iou_threshold})\n") - f.write(f"Evaluated Classes: {', '.join(args.classes)}\n") - f.write(f"Confidence Thresholds: {args.confidence_thresholds}\n") - f.write("="*60 + "\n") - - overall_mAPs = {} # Store mAP for each detector {detector_name: mAP_value} - - # Process results for each detector that was evaluated - for detector_name in final_detector_names: - # Check if results exist for this detector (might be skipped if loading failed) - if detector_name not in results_all_detectors: - print(f"\nDetector: {detector_name} (SKIPPED - No results)") - f.write(f"\nDetector: {detector_name} (SKIPPED - No results)\n") - continue # Skip to the next detector - - results_by_class = results_all_detectors[detector_name] - print(f"\nDetector: {detector_name}") - f.write(f"\nDetector: {detector_name}\n") - f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") - f.write("-" * 85 + "\n") - - detector_aps = [] # AP values for classes with GT > 0 for this detector - evaluated_classes_with_gt = [] # Class names with GT > 0 for this detector - - # Iterate through the classes specified by the user for consistent reporting - for cls in args.classes: - # Check if GT data exists for this class across all samples - num_gt_total_for_class = sum(len(gt_boxes.get(cls, [])) for gt_boxes in gt_boxes_all_samples.values()) - - if cls in results_by_class: - # Results exist for this class and detector - res = results_by_class[cls] - ap = res['ap'] - num_gt = res['num_gt'] # Should match num_gt_total_for_class if evaluated correctly - num_pred = res['num_pred'] # Number of valid predictions for this class - best_threshold = res.get('best_threshold', 0.0) - - # Get confusion matrix from best threshold - cm = res.get('confusion_matrix', {}) - tp = cm.get('TP', 0) - fp = cm.get('FP', 0) - fn = cm.get('FN', 0) - - print(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10} | {best_threshold:<11.3f} | {tp:<5} | {fp:<5} | {fn:<5}") - f.write(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10} | {best_threshold:<11.3f} | {tp:<5} | {fp:<5} | {fn:<5}\n") - - # Include in mAP calculation only if there were GT boxes for this class - if num_gt > 0: - detector_aps.append(ap) - evaluated_classes_with_gt.append(cls) - # If num_gt is 0 for a class, its AP is 0 or NaN, not included in mAP. - else: - # No results entry for this class, implies 0 valid predictions processed. - # Report AP as 0.0000 if GT existed, otherwise N/A. - print(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10} | {'N/A':<11} | {'0':<5} | {'0':<5} | {num_gt_total_for_class:<5}") - f.write(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10} | {'N/A':<11} | {'0':<5} | {'0':<5} | {num_gt_total_for_class:<5}\n") - # If GT existed, this class contributes 0 to the mAP average. - if num_gt_total_for_class > 0: - detector_aps.append(0.0) - evaluated_classes_with_gt.append(cls) - - # Calculate and report mAP for this detector based on evaluated classes with GT - if detector_aps: # Check if list is not empty - mAP = np.mean(detector_aps) - overall_mAPs[detector_name] = mAP - # Report which classes contributed to the mAP - mAP_info = f"(Classes w/ GT: {', '.join(evaluated_classes_with_gt)})" if evaluated_classes_with_gt else "" - print("-" * 85) - print(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}") - f.write("-" * 85 + "\n") - f.write(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}\n") - - # Add confusion matrix section for this detector - f.write("\nConfusion Matrix Summary (at best threshold per class):\n") - f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") - f.write("-" * 75 + "\n") - - for cls in evaluated_classes_with_gt: - if cls in results_by_class: - res = results_by_class[cls] - best_thresh = res.get('best_threshold', 0.0) - cm = res.get('confusion_matrix', {}) - - tp = cm.get('TP', 0) - fp = cm.get('FP', 0) - fn = cm.get('FN', 0) - - precision = tp / (tp + fp) if (tp + fp) > 0 else 0 - recall = tp / (tp + fn) if (tp + fn) > 0 else 0 - - f.write(f"{cls:<15} | {best_thresh:<10.3f} | {tp:<5} | {fp:<5} | {fn:<5} | {precision:<10.4f} | {recall:<10.4f}\n") - - else: - overall_mAPs[detector_name] = np.nan # Indicate mAP couldn't be computed - print("-" * 85) - print(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}") - f.write("-" * 85 + "\n") - f.write(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}\n") - - - ######## Overall Comparison Table - # Check if there are results to compare - if len(final_detector_names) > 0 and results_all_detectors: - print("\n[LOG] Overall Class AP Comparison") - f.write("\n" + "="*60 + "\n") - f.write("Overall Class AP Comparison\n") - f.write("="*60 + "\n") - # Truncate long detector names for table header - header_names = [name[:12] for name in final_detector_names] # Limit name length - header = f"{'Class':<15}" + "".join([f" | {name:<12}" for name in header_names]) - print(header) - f.write(header + "\n") - print("-" * len(header)) - f.write("-" * len(header) + "\n") - - # Iterate through user-specified classes for the table rows - for cls in args.classes: - aps_str = "" - num_gt_total_for_class = sum(len(gt_boxes.get(cls, [])) for gt_boxes in gt_boxes_all_samples.values()) - - # Get AP for each detector for this class - for detector_name in final_detector_names: - # Retrieve AP using .get chains safely - ap_val = results_all_detectors.get(detector_name, {}).get(cls, {}).get('ap', np.nan) - - # Decide display value based on AP and total GT count - if np.isnan(ap_val): - # Show 0.0000 if GT existed, else N/A - display_val = f"{0.0:<12.4f}" if num_gt_total_for_class > 0 else f"{'N/A':<12}" - else: - # Format valid AP value - display_val = f"{ap_val:<12.4f}" - aps_str += f" | {display_val}" - - line = f"{cls:<15}" + aps_str - print(line) - f.write(line + "\n") - - ######## Overall mAP Comparison - print("-" * len(header)) - f.write("-" * len(header) + "\n") - map_str = "" - for detector_name in final_detector_names: - map_val = overall_mAPs.get(detector_name, np.nan) # Get calculated mAP - map_str += f" | {map_val:<12.4f}" if not np.isnan(map_val) else f" | {'N/A':<12}" - map_line = f"{'mAP':<15}" + map_str - print(map_line) - f.write(map_line + "\n") - else: - print("\n[LOG] Skipping overall comparison table - no results available.") - f.write("\n[LOG] Skipping overall comparison table - no results available.\n") - - print(f"\nMetrics saved to: {results_file_path}") - - except Exception as e: - print(f"[LOG] Error during results reporting/saving: {e}") - traceback.print_exc() - # Plot - # return # Or stop - - ########## Export Confusion Matrix CSV - try: - export_confusion_matrix(results_all_detectors, args.output_dir, args.classes) - except Exception as e: - print(f"[LOG] Error exporting confusion matrix: {e}") - traceback.print_exc() - - ########## Plotting - print("\n[LOG] Generating PR curves...") - try: - # Check if results_all_detectors has data before plotting - if results_all_detectors and any(results_all_detectors.values()): - # Pass only valid detector results to plotting function - # Filter out detectors that were skipped or had errors resulting in no results dict - valid_results = {k: v for k, v in results_all_detectors.items() if isinstance(v, dict) and k in final_detector_names} - if valid_results: - plot_pr_curves(valid_results, args.classes, args.output_dir) - print(f"[LOG] PR curve plots potentially saved in: {args.output_dir}") - else: - print("[LOG] Skipping plotting - no valid evaluation results were generated.") - else: - print("[LOG] Skipping plotting - no evaluation results generated.") - - except Exception as e: - print(f"Error during plotting: {e}") - traceback.print_exc() - - print("\n[LOG] Evaluation finished.") - - -if __name__ == '__main__': - main() +import argparse +import os +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from collections import defaultdict +import math +import warnings +import traceback +import csv + +######## Bbox coord mapping +IDX_X, IDX_Y, IDX_Z = 0, 1, 2 +IDX_L, IDX_W, IDX_H = 3, 4, 5 +IDX_YAW = 6 +IDX_CLASS = 7 +IDX_SCORE = 8 + +######## Helper Functions + +def parse_box_line(line, is_gt=False): + """ + Parses a line from a KITTI format label file. + Handles the conversion from KITTI's y-coordinate (bottom of box) + to the geometric center y used internally. + """ + parts = line.strip().split() + try: + # Parse KITTI format: + # type truncated occluded alpha bbox_2d(4) dims(3) loc(3) rotation_y (score) + # 0 1 2 3 4-7 8-10 11-13 14 (15) + + # Check minimum length for standard KITTI format (15 fields without score) + if len(parts) < 15: + raise ValueError(f"Line does not have enough parts for KITTI format (expected >= 15, got {len(parts)})") + + class_label = parts[0] + + # Read dimensions (h, w, l) from KITTI standard indices 8, 9, 10 + h = float(parts[8]) + w = float(parts[9]) + l = float(parts[10]) + + # Read location (x, y_bottom, z) from KITTI standard indices 11, 12, 13 + loc_x = float(parts[11]) + loc_y_bottom = float(parts[12]) # KITTI 'y' is bottom of the box in camera coords + loc_z = float(parts[13]) + + # Read rotation_y (yaw) from KITTI standard index 14 + rot_y = float(parts[14]) + + # Convert to internal format + # Internal format: [cx, cy, cz, l, w, h, yaw, class_label, score] + + # Use KITTI loc_x and loc_z directly for internal cx, cz + cx = loc_x + cz = loc_z + + # Convert KITTI y (bottom of box) to geometric center y for internal use. + # In KITTI camera coords, Y points down, so center is 'above' bottom (numerically smaller y): loc_y - h / 2 + cy = loc_y_bottom - (h / 2.0) + + # Use KITTI rot_y directly for internal yaw + yaw = rot_y + + # Create the standardized box list [cx, cy, cz, l, w, h, yaw, class_label] + # Note the internal dimension order (l, w, h) differs from KITTI input order (h, w, l) + box = [cx, cy, cz, l, w, h, yaw, class_label] + + # Append score if it's a prediction file (standard KITTI index 15) + if not is_gt: + if len(parts) > 15: + score = float(parts[15]) + else: + # Handle missing score in prediction file if needed + score = 1.0 # Assigning default 1.0 as before + warnings.warn(f"[LOG] Prediction line missing score (field 16), assigning default 1.0: {line.strip()}") + box.append(score) # Append score using IDX_SCORE implicitly + + return box + + except ValueError as ve: + print(f"[LOG] Error parsing line: '{line.strip()}' - {ve}") + print("[LOG] Does input follows KITTI format ?") + return None + except Exception as e: + print(f"[LOG] Unexpected error parsing line: '{line.strip()}' - {e}") + traceback.print_exc() # Print detailed traceback for unexpected errors + return None + + +def load_labels(file_path, is_gt=False): + """Loads ground truth or prediction boxes from a KITTI-format file.""" + boxes = defaultdict(list) + if not os.path.exists(file_path): + # Reduced verbosity for missing prediction files (common) + # print(f"[LOG] File not found {file_path}, returning empty dict.") + return boxes + try: + with open(file_path, 'r') as f: + for i, line in enumerate(f): + box = parse_box_line(line, is_gt=is_gt) + if box: + class_label = box[IDX_CLASS] + boxes[class_label].append(box) + else: + print(f"[LOG] Skipping invalid line {i+1} in {file_path}") + except Exception as e: + print(f"[LOG] Error reading file {file_path}: {e}") + traceback.print_exc() + return boxes + +def get_aabb_corners(box): + # Uses internal format where cy is geometric center + cx, cy, cz, l, w, h = box[IDX_X], box[IDX_Y], box[IDX_Z], box[IDX_L], box[IDX_W], box[IDX_H] + # Ignores yaw + min_x, max_x = cx - l / 2, cx + l / 2 + min_y, max_y = cy - h / 2, cy + h / 2 # Uses geometric center cy + min_z, max_z = cz - w / 2, cz + w / 2 # Assuming cz is center z (depth) + return min_x, max_x, min_y, max_y, min_z, max_z + +def get_volume(box): + return box[IDX_L] * box[IDX_W] * box[IDX_H] + +def calculate_3d_iou(box1, box2, get_corners_cb, get_volume_cb): + """ + Calculates the 3D Intersection over Union (IoU) between two bounding boxes. + + Args: + box1, box2: List or tuple representing a 3D bounding box in the + *internal standardized format*: [cx, cy, cz, l, w, h, yaw, ...] + where cy is the geometric center y. + get_corners_cb: Callback function to extract AABB corners from a box + get_volume_cb: Callback function to calculate volume of a box + + Returns: + float: The 3D IoU value. + + Doesn't consider yaw + """ + + ######### Simple Axis-Aligned Bounding Box (AABB) IoU# + min_x1, max_x1, min_y1, max_y1, min_z1, max_z1 = get_corners_cb(box1) + min_x2, max_x2, min_y2, max_y2, min_z2, max_z2 = get_corners_cb(box2) + + # Calculate intersection volume + inter_min_x = max(min_x1, min_x2) + inter_max_x = min(max_x1, max_x2) + inter_min_y = max(min_y1, min_y2) + inter_max_y = min(max_y1, max_y2) + inter_min_z = max(min_z1, min_z2) + inter_max_z = min(max_z1, max_z2) + + inter_l = max(0, inter_max_x - inter_min_x) + inter_h = max(0, inter_max_y - inter_min_y) + inter_w = max(0, inter_max_z - inter_min_z) + intersection_volume = inter_l * inter_h * inter_w + + # Calculate union volume + vol1 = get_volume_cb(box1) + vol2 = get_volume_cb(box2) + # Ensure volumes are positive and non-zero for stable IoU + vol1 = max(vol1, 1e-6) + vol2 = max(vol2, 1e-6) + union_volume = vol1 + vol2 - intersection_volume + union_volume = max(union_volume, 1e-6) # Avoid division by zero or very small numbers + + iou = intersection_volume / union_volume + # Clamp IoU to [0, 1] range + iou = max(0.0, min(iou, 1.0)) + return iou + + +def calculate_ap(precision, recall): + """Calculates Average Precision using the PASCAL VOC method (area under monotonic PR curve).""" + # Convert to numpy arrays first for safety + if not isinstance(precision, (list, np.ndarray)) or not isinstance(recall, (list, np.ndarray)): + return 0.0 + precision = np.array(precision) + recall = np.array(recall) + + if precision.size == 0 or recall.size == 0: + return 0.0 + + # Prepend/Append points for interpolation boundaries + recall = np.concatenate(([0.], recall, [1.0])) + precision = np.concatenate(([0.], precision, [0.])) # Start with 0 precision at recall 0, end with 0 at recall 1 + + # Make precision monotonically decreasing (handles PR curve 'jiggles') + for i in range(len(precision) - 2, -1, -1): + precision[i] = max(precision[i], precision[i+1]) + + # Find indices where recall changes (avoids redundant calculations) + indices = np.where(recall[1:] != recall[:-1])[0] + 1 + + # Compute AP using the area under the curve (sum of rectangle areas) + ap = np.sum((recall[indices] - recall[indices-1]) * precision[indices]) + return ap + +def evaluate_detector(gt_boxes_all_samples, pred_boxes_all_samples, classes, iou_threshold, confidence_thresholds=None): + """Evaluates a single detector's predictions against ground truth. + + Args: + gt_boxes_all_samples: Ground truth boxes dictionary by sample_id and class + pred_boxes_all_samples: Prediction boxes dictionary by sample_id and class + classes: List of classes to evaluate + iou_threshold: IoU threshold for matching predictions to ground truth + confidence_thresholds: List of confidence thresholds to use for evaluation + If None, uses [0.0] (all predictions) + + Returns: + Dictionary of results by class + """ + # Default to single threshold that includes all predictions + if confidence_thresholds is None: + confidence_thresholds = [0.0] + + results_by_class = {} + sample_ids = list(gt_boxes_all_samples.keys()) # Get fixed order of sample IDs + + for cls in classes: + all_pred_boxes_cls = [] + num_gt_cls = 0 + pred_sample_indices = [] # Store index from sample_ids for each prediction + pred_scores = [] # Store confidence scores for each prediction + + # Collect all GTs and Preds for this class across samples + for i, sample_id in enumerate(sample_ids): + # Use .get() with default empty dict/list for safety + gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) + pred_boxes = pred_boxes_all_samples.get(sample_id, {}).get(cls, []) + + num_gt_cls += len(gt_boxes) + for box in pred_boxes: + all_pred_boxes_cls.append(box) + pred_sample_indices.append(i) # Store the original sample index + # Extract score safely + if len(box) > IDX_SCORE and isinstance(box[IDX_SCORE], (int, float)): + pred_scores.append(box[IDX_SCORE]) + else: + pred_scores.append(0.0) # Default score if missing + + if not all_pred_boxes_cls: # Handle case with no predictions for this class + results_by_class[cls] = { + 'precision': np.array([]), # Use empty numpy arrays + 'recall': np.array([]), + 'ap': 0.0, + 'num_gt': num_gt_cls, + 'num_pred': 0, + 'confusion_matrix': {}, # Empty confusion matrix + 'thresholds': confidence_thresholds + } + continue # Skip to next class + + # Sort all predictions by score (descending) + sorted_indices = np.argsort(pred_scores)[::-1] + + # Reorder the lists based on sorted scores + all_pred_boxes_cls = [all_pred_boxes_cls[i] for i in sorted_indices] + pred_sample_indices = [pred_sample_indices[i] for i in sorted_indices] + pred_scores = [pred_scores[i] for i in sorted_indices] + + # Store results for each confidence threshold + threshold_results = {} + + # For each confidence threshold + for threshold in confidence_thresholds: + # Filter predictions above threshold + threshold_indices = [i for i, score in enumerate(pred_scores) if score >= threshold] + if not threshold_indices: + # No predictions above this threshold + threshold_results[threshold] = { + 'precision': np.array([]), + 'recall': np.array([]), + 'ap': 0.0, + 'tp': np.array([]), + 'fp': np.array([]), + 'tn': 0, # TN doesn't apply directly to object detection (no explicit negatives) + 'fn': num_gt_cls, # All GT boxes are missed + 'confusion_matrix': { + 'TP': 0, + 'FP': 0, + 'FN': num_gt_cls, + 'TN': 0 # Included for completeness + } + } + continue + + # Get predictions above this threshold + filtered_pred_boxes = [all_pred_boxes_cls[i] for i in threshold_indices] + filtered_sample_indices = [pred_sample_indices[i] for i in threshold_indices] + filtered_scores = [pred_scores[i] for i in threshold_indices] + + tp = np.zeros(len(filtered_pred_boxes)) + fp = np.zeros(len(filtered_pred_boxes)) + # Track matched GTs per sample: gt_matched[sample_idx][gt_box_idx] = True/False + gt_matched = defaultdict(lambda: defaultdict(bool)) # Indexed by sample_idx, then gt_idx + + # Match predictions + for det_idx, pred_box in enumerate(filtered_pred_boxes): + sample_idx = filtered_sample_indices[det_idx] + sample_id = sample_ids[sample_idx] + gt_boxes = gt_boxes_all_samples.get(sample_id, {}).get(cls, []) + + best_iou = -1.0 + best_gt_idx = -1 + + if not gt_boxes: # No GT for this class in this specific sample + fp[det_idx] = 1 + continue + + for gt_idx, gt_box in enumerate(gt_boxes): + # Explicitly check class match (belt-and-suspenders) + if pred_box[IDX_CLASS] == gt_box[IDX_CLASS]: + iou = calculate_3d_iou(pred_box, gt_box, get_aabb_corners, get_volume) + if iou > best_iou: + best_iou = iou + best_gt_idx = gt_idx + + if best_iou >= iou_threshold: + # Check if this GT box was already matched *in this sample* + if not gt_matched[sample_idx].get(best_gt_idx, False): + tp[det_idx] = 1 + gt_matched[sample_idx][best_gt_idx] = True # Mark as matched + else: + fp[det_idx] = 1 # Already matched by higher-scored prediction + else: + fp[det_idx] = 1 # No match with sufficient IoU + + # Calculate confusion matrix metrics + total_tp = int(np.sum(tp)) + total_fp = int(np.sum(fp)) + + # Count false negatives (GT boxes that weren't matched) + total_fn = num_gt_cls - sum(len(matched) for matched in gt_matched.values()) + # True negatives don't apply well to object detection, but included for completeness + total_tn = 0 # Placeholder + + # Calculate precision/recall + fp_cumsum = np.cumsum(fp) + tp_cumsum = np.cumsum(tp) + + # Avoid division by zero if num_gt_cls is 0 + recall = tp_cumsum / num_gt_cls if num_gt_cls > 0 else np.zeros_like(tp_cumsum, dtype=float) + + # Avoid division by zero if no predictions were made or matched (tp + fp = 0) + denominator = tp_cumsum + fp_cumsum + precision = np.divide(tp_cumsum, denominator, out=np.zeros_like(tp_cumsum, dtype=float), where=denominator!=0) + + ap = calculate_ap(precision, recall) + + # Store results for this threshold + threshold_results[threshold] = { + 'precision': precision, + 'recall': recall, + 'ap': ap, + 'tp': tp, + 'fp': fp, + 'fn': total_fn, + 'tn': total_tn, + 'confusion_matrix': { + 'TP': total_tp, + 'FP': total_fp, + 'FN': total_fn, + 'TN': total_tn + }, + 'scores': filtered_scores + } + + # Find best AP score across thresholds + best_threshold = max(threshold_results.keys(), + key=lambda t: threshold_results[t]['ap'] if not np.isnan(threshold_results[t]['ap']) else 0.0) + best_result = threshold_results[best_threshold] + + # Store overall results for this class + results_by_class[cls] = { + 'precision': best_result['precision'], + 'recall': best_result['recall'], + 'ap': best_result['ap'], + 'num_gt': num_gt_cls, + 'num_pred': len(all_pred_boxes_cls), + 'best_threshold': best_threshold, + 'thresholds': confidence_thresholds, + 'threshold_results': threshold_results, + 'confusion_matrix': best_result['confusion_matrix'] + } + + return results_by_class + + +def plot_pr_curves(results_all_detectors, classes, output_dir): + """ + Plots Precision-Recall curves for each class. + + Creates: + 1. Overall PR curves comparing all detectors at their best threshold + 2. PR curves for each detector showing performance at different confidence thresholds + """ + if not os.path.exists(output_dir): + try: + os.makedirs(output_dir) + except OSError as e: + print(f"[LOG] Error creating output directory {output_dir} for plots: {e}") + return # Cannot save plots + + pr_curves_dir = os.path.join(output_dir, 'pr_curves') + if not os.path.exists(pr_curves_dir): + try: + os.makedirs(pr_curves_dir) + except OSError as e: + print(f"[LOG] Error creating PR curves directory {pr_curves_dir}: {e}") + return + + detector_names = list(results_all_detectors.keys()) + + # Overall PR curves comparing detectors + for cls in classes: + plt.figure(figsize=(10, 7)) + any_results_for_class = False # Track if any detector had results for this class + + for detector_name, results_by_class in results_all_detectors.items(): + if cls in results_by_class and results_by_class[cls]['num_pred'] > 0: # Check if there were predictions + res = results_by_class[cls] + precision = res['precision'] + recall = res['recall'] + ap = res['ap'] + best_threshold = res.get('best_threshold', 0.0) + + # Ensure plotting works even if precision/recall are empty arrays + if recall.size > 0 and precision.size > 0: + # Prepend a point for plotting nicely from recall=0 + plot_recall = np.concatenate(([0.], recall)) + # Use precision[0] if available, else 0. + plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) + plt.plot(plot_recall, plot_precision, marker='.', markersize=4, linestyle='-', + label=f'{detector_name} (AP={ap:.3f}, t={best_threshold:.2f})') + any_results_for_class = True + else: # Handle case where num_pred > 0 but P/R arrays somehow ended up empty + plt.plot([0], [0], marker='s', markersize=5, linestyle='', + label=f'{detector_name} (AP={ap:.3f}, No P/R data?)') + any_results_for_class = True # Still mark as having results + + elif cls in results_by_class: # Class exists in evaluation, but no predictions were made for it + num_gt = results_by_class[cls]['num_gt'] + if num_gt > 0: + # Plot a marker indicating no predictions were made for this GT class + plt.plot([0], [0], marker='x', markersize=6, linestyle='', + label=f'{detector_name} (No Pred, GT={num_gt})') + + if any_results_for_class: + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.title(f'Precision-Recall Curve for Class: {cls}') + plt.legend(loc='lower left', fontsize='small') + plt.grid(True) + plt.xlim([-0.05, 1.05]) + plt.ylim([-0.05, 1.05]) + plot_path = os.path.join(pr_curves_dir, f'pr_curve_{cls}_overview.png') + try: + plt.savefig(plot_path) + print(f"[LOG] Generated PR curve overview: {plot_path}") + except Exception as e: + print(f"[LOG] Error saving PR curve plot for class '{cls}': {e}") + finally: + plt.close() # Close the figure regardless of save success + else: + # Check if there was any GT data for this class across all detectors + num_gt_total = sum(results_by_class.get(cls, {}).get('num_gt', 0) for results_by_class in results_all_detectors.values()) + if num_gt_total > 0: + print(f" Skipping PR plot for class '{cls}': No predictions found across detectors (GT={num_gt_total}).") + else: + print(f" Skipping PR plot for class '{cls}': No ground truth found.") + plt.close() # Close the empty figure + + # Plots showing PR curves at different thresholds for each detector and class + for detector_name, results_by_class in results_all_detectors.items(): + detector_dir = os.path.join(pr_curves_dir, detector_name) + if not os.path.exists(detector_dir): + try: + os.makedirs(detector_dir) + except OSError as e: + print(f"[LOG] Error creating directory for {detector_name}: {e}") + continue + + for cls in classes: + if cls not in results_by_class or results_by_class[cls]['num_pred'] == 0: + continue + + res = results_by_class[cls] + threshold_results = res.get('threshold_results', {}) + + if not threshold_results: + continue + + plt.figure(figsize=(10, 7)) + + # Plot PR curve for each threshold + thresholds = sorted(threshold_results.keys()) + for threshold in thresholds: + threshold_res = threshold_results[threshold] + + # Skip thresholds with no predictions + if 'precision' not in threshold_res or len(threshold_res['precision']) == 0: + continue + + precision = threshold_res['precision'] + recall = threshold_res['recall'] + ap = threshold_res['ap'] + + # Get TP/FP counts for label + cm = threshold_res.get('confusion_matrix', {}) + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + + # Mark best threshold with a different line style + is_best = threshold == res.get('best_threshold', 0.0) + linestyle = '-' if is_best else '--' + linewidth = 2 if is_best else 1 + + # Prepend points for better visualization + plot_recall = np.concatenate(([0.], recall)) + plot_precision = np.concatenate(([precision[0] if precision.size > 0 else 0.], precision)) + + plt.plot(plot_recall, plot_precision, marker='.', markersize=3, + linestyle=linestyle, linewidth=linewidth, + label=f'thresh={threshold:.2f} (AP={ap:.3f}, TP={tp}, FP={fp})') + + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.title(f'{detector_name}: PR Curves at Different Thresholds for {cls}') + plt.legend(loc='lower left', fontsize='small') + plt.grid(True) + plt.xlim([-0.05, 1.05]) + plt.ylim([-0.05, 1.05]) + + plot_path = os.path.join(detector_dir, f'pr_curve_{cls}_thresholds.png') + try: + plt.savefig(plot_path) + print(f"[LOG] Generated threshold PR curves for {detector_name}/{cls}: {plot_path}") + except Exception as e: + print(f"[LOG] Error saving threshold PR curves for {detector_name}/{cls}: {e}") + finally: + plt.close() + +def export_confusion_matrix(results_all_detectors, output_dir, classes): + """ + Exports confusion matrix metrics for all detectors and classes to CSV files. + + Args: + results_all_detectors: Dictionary of results by detector and class + output_dir: Directory to save results + classes: List of classes to evaluate + """ + confusion_dir = os.path.join(output_dir, 'confusion_matrices') + if not os.path.exists(confusion_dir): + os.makedirs(confusion_dir) + + # Create a summary file for all detectors + summary_path = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + with open(summary_path, 'w', newline='') as f: + writer = csv.writer(f) + # Write header + writer.writerow(['Detector', 'Class', 'Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall', 'AP']) + + # Write data for each detector, class, and threshold + for detector_name, results_by_class in results_all_detectors.items(): + for cls in classes: + if cls in results_by_class: + cls_results = results_by_class[cls] + best_threshold = cls_results.get('best_threshold', 0.0) + threshold_results = cls_results.get('threshold_results', {}) + + for threshold, result in threshold_results.items(): + # Skip if no predictions at this threshold + if 'confusion_matrix' not in result: + continue + + cm = result['confusion_matrix'] + tp = cm['TP'] + fp = cm['FP'] + fn = cm['FN'] + tn = cm['TN'] + + # Calculate precision and recall + precision = tp / (tp + fp) if (tp + fp) > 0 else 0 + recall = tp / (tp + fn) if (tp + fn) > 0 else 0 + + # Write row + is_best = '* ' if threshold == best_threshold else '' + writer.writerow([ + detector_name, + cls, + f"{is_best}{threshold:.3f}", + tp, + fp, + fn, + tn, + f"{precision:.4f}", + f"{recall:.4f}", + f"{result['ap']:.4f}" + ]) + + print(f"[LOG] Confusion matrix metrics saved to: {summary_path}") + + # Create detailed files for each detector with per-threshold metrics + for detector_name, results_by_class in results_all_detectors.items(): + detector_file = os.path.join(confusion_dir, f'{detector_name}_detailed.csv') + with open(detector_file, 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['Class', 'Threshold', 'TP', 'FP', 'FN', 'TN', 'Precision', 'Recall', 'AP']) + + for cls in classes: + if cls in results_by_class: + cls_results = results_by_class[cls] + threshold_results = cls_results.get('threshold_results', {}) + + # Sort thresholds for better readability + thresholds = sorted(threshold_results.keys()) + + for threshold in thresholds: + result = threshold_results[threshold] + if 'confusion_matrix' not in result: + continue + + cm = result['confusion_matrix'] + precision = cm['TP'] / (cm['TP'] + cm['FP']) if (cm['TP'] + cm['FP']) > 0 else 0 + recall = cm['TP'] / (cm['TP'] + cm['FN']) if (cm['TP'] + cm['FN']) > 0 else 0 + + writer.writerow([ + cls, + f"{threshold:.3f}", + cm['TP'], + cm['FP'], + cm['FN'], + cm['TN'], + f"{precision:.4f}", + f"{recall:.4f}", + f"{result['ap']:.4f}" + ]) + + print(f"[LOG] Detailed metrics for {detector_name} saved to: {detector_file}") + +def main(): + parser = argparse.ArgumentParser(description='Evaluate N 3D Object Detectors using KITTI format labels.') + parser.add_argument('gt_dir', type=str, help='Directory containing ground truth label files (KITTI format).') + parser.add_argument('pred_dirs', type=str, nargs='+', help='List of directories containing prediction files (KITTI format).') + parser.add_argument('output_dir', type=str, help='Directory to save evaluation results.') + parser.add_argument('--detector_names', type=str, nargs='*', help='Optional names for the detectors (corresponding to pred_dirs). If not provided, uses directory names.') + parser.add_argument('--iou_threshold', type=float, default=0.5, help='3D IoU threshold for matching (default: 0.5). KITTI examples: 0.5 (Car Easy/Mod), 0.7 (Car Hard), 0.5 (Ped/Cyc Easy/Mod), 0.5 (Ped/Cyc Hard).') + parser.add_argument('--classes', type=str, nargs='*', default=['Car', 'Pedestrian', 'Cyclist'], help='List of classes to evaluate (default: KITTI common classes). Case sensitive.') + parser.add_argument('--file_extension', type=str, default='.txt', help='Extension of the label files (default: .txt).') + parser.add_argument('--confidence_thresholds', type=float, nargs='*', default=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], + help='Confidence thresholds for evaluation (default: 0.0-0.9 in 0.1 increments).') + + args = parser.parse_args() + + ######## Argument Validation and Setup + if not os.path.isdir(args.gt_dir): + print(f"[LOG] Error: Ground truth directory not found: {args.gt_dir}") + return + # Check prediction directories *before* loading GT + valid_pred_dirs = [] + original_detector_names = args.detector_names if args.detector_names else [os.path.basename(d.rstrip('/\\')) for d in args.pred_dirs] + final_detector_names = [] + pred_dir_map = {} # Map name back to directory path + + for i, d in enumerate(args.pred_dirs): + detector_name = original_detector_names[i] + if not os.path.isdir(d): + print(f"[LOG] Prediction directory not found: {d}. Skipping detector '{detector_name}'.") + else: + valid_pred_dirs.append(d) + final_detector_names.append(detector_name) + pred_dir_map[detector_name] = d + + if not valid_pred_dirs: + print("[LOG] Error: No valid prediction directories provided.") + return + + # Ensure output directory exists + try: + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + print(f"[LOG] Created output directory: {args.output_dir}") + except OSError as e: + print(f"[LOG] Error creating output directory {args.output_dir}: {e}") + return + + print("[LOG] Configuration") + print(f"Ground Truth Dir: {args.gt_dir}") + print(f"Prediction Dirs: {valid_pred_dirs}") # Show only valid dirs being used + print(f"Detector Names: {final_detector_names}") # Show corresponding names + print(f"Output Dir: {args.output_dir}") + print(f"IoU Threshold: {args.iou_threshold}") + print(f"Classes: {args.classes}") + print(f"File Extension: {args.file_extension}") + print(f"Confidence Thresholds: {args.confidence_thresholds}") + print("=====================\n") + + ######## Load Data + print("[LOG] Loading data...") + try: + gt_files = sorted([f for f in os.listdir(args.gt_dir) if f.endswith(args.file_extension)]) + if not gt_files: + print(f"[LOG] No ground truth files found in {args.gt_dir} with extension {args.file_extension}. Cannot evaluate.") + return + + print(f"Found {len(gt_files)} ground truth files.") + gt_boxes_all_samples = {} # {sample_id: {class: [boxes]}} + # Load GT data first + print("Loading Ground Truth...") + for filename in gt_files: + sample_id = os.path.splitext(filename)[0] + gt_file_path = os.path.join(args.gt_dir, filename) + gt_boxes_all_samples[sample_id] = load_labels(gt_file_path, is_gt=True) + print("Ground Truth loaded.") + + # Load prediction data for valid detectors + pred_boxes_all_detectors = {} # {detector_name: {sample_id: {class: [boxes]}}} + print("Loading Predictions...") + for detector_name in final_detector_names: + pred_dir = pred_dir_map[detector_name] # Get path from map + print(f" Loading from {detector_name} ({pred_dir})...") + pred_boxes_all_detectors[detector_name] = {} + files_found_count = 0 + for filename in gt_files: # Iterate based on GT files for consistency + sample_id = os.path.splitext(filename)[0] + pred_file_path = os.path.join(pred_dir, filename) + # load_labels handles non-existent files, returns empty dict + pred_boxes_all_detectors[detector_name][sample_id] = load_labels(pred_file_path, is_gt=False) + if os.path.exists(pred_file_path): + files_found_count += 1 + print(f" Found {files_found_count} matching prediction files for {detector_name}.") + print("Predictions loaded.") + + print(f"\n[LOG] Loaded data for {len(gt_files)} samples and {len(final_detector_names)} detector(s).\n") + + except Exception as e: + print(f"[LOG] Error during data loading: {e}") + traceback.print_exc() + return + + ######## Evaluation + print("[LOG] Evaluating detectors...") + results_all_detectors = {} # {detector_name: {class: {'ap':, 'p':, 'r':, 'gt':, 'pred':}}} + try: + for detector_name in final_detector_names: # Use the filtered list of names + print(f" Evaluating: {detector_name}") + # Check if prediction data was actually loaded for this detector + if detector_name not in pred_boxes_all_detectors: + print(f" [LOG] Skipping {detector_name} - No prediction data loaded (check LOG)") + results_all_detectors[detector_name] = {} # Store empty results + continue + + pred_boxes_all_samples = pred_boxes_all_detectors[detector_name] + + if not gt_boxes_all_samples: # Redundant check, but safe + print(f" Skipping {detector_name} - No ground truth data loaded.") + results_all_detectors[detector_name] = {} + continue + + # Perform evaluation + results_by_class = evaluate_detector( + gt_boxes_all_samples, + pred_boxes_all_samples, + args.classes, # Pass the user-specified classes + args.iou_threshold, + args.confidence_thresholds + ) + results_all_detectors[detector_name] = results_by_class + print("[LOG] Evaluation complete.\n") + except Exception as e: + print(f"[LOG] Error during evaluation: {e}") + traceback.print_exc() + return # Stop execution on evaluation error + + + ######## Report Results & Save + print("[LOG] Results") + results_file_path = os.path.join(args.output_dir, 'evaluation_metrics.txt') + + try: + with open(results_file_path, 'w') as f: + f.write(f"Evaluation Results (IoU Threshold: {args.iou_threshold})\n") + f.write(f"Evaluated Classes: {', '.join(args.classes)}\n") + f.write(f"Confidence Thresholds: {args.confidence_thresholds}\n") + f.write("="*60 + "\n") + + overall_mAPs = {} # Store mAP for each detector {detector_name: mAP_value} + + # Process results for each detector that was evaluated + for detector_name in final_detector_names: + # Check if results exist for this detector (might be skipped if loading failed) + if detector_name not in results_all_detectors: + print(f"\nDetector: {detector_name} (SKIPPED - No results)") + f.write(f"\nDetector: {detector_name} (SKIPPED - No results)\n") + continue # Skip to the next detector + + results_by_class = results_all_detectors[detector_name] + print(f"\nDetector: {detector_name}") + f.write(f"\nDetector: {detector_name}\n") + f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") + f.write("-" * 85 + "\n") + + detector_aps = [] # AP values for classes with GT > 0 for this detector + evaluated_classes_with_gt = [] # Class names with GT > 0 for this detector + + # Iterate through the classes specified by the user for consistent reporting + for cls in args.classes: + # Check if GT data exists for this class across all samples + num_gt_total_for_class = sum(len(gt_boxes.get(cls, [])) for gt_boxes in gt_boxes_all_samples.values()) + + if cls in results_by_class: + # Results exist for this class and detector + res = results_by_class[cls] + ap = res['ap'] + num_gt = res['num_gt'] # Should match num_gt_total_for_class if evaluated correctly + num_pred = res['num_pred'] # Number of valid predictions for this class + best_threshold = res.get('best_threshold', 0.0) + + # Get confusion matrix from best threshold + cm = res.get('confusion_matrix', {}) + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + fn = cm.get('FN', 0) + + print(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10} | {best_threshold:<11.3f} | {tp:<5} | {fp:<5} | {fn:<5}") + f.write(f"{cls:<15} | {ap:<10.4f} | {num_gt:<10} | {num_pred:<10} | {best_threshold:<11.3f} | {tp:<5} | {fp:<5} | {fn:<5}\n") + + # Include in mAP calculation only if there were GT boxes for this class + if num_gt > 0: + detector_aps.append(ap) + evaluated_classes_with_gt.append(cls) + # If num_gt is 0 for a class, its AP is 0 or NaN, not included in mAP. + else: + # No results entry for this class, implies 0 valid predictions processed. + # Report AP as 0.0000 if GT existed, otherwise N/A. + print(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10} | {'N/A':<11} | {'0':<5} | {'0':<5} | {num_gt_total_for_class:<5}") + f.write(f"{cls:<15} | {'0.0000' if num_gt_total_for_class > 0 else 'N/A':<10} | {num_gt_total_for_class:<10} | {'0':<10} | {'N/A':<11} | {'0':<5} | {'0':<5} | {num_gt_total_for_class:<5}\n") + # If GT existed, this class contributes 0 to the mAP average. + if num_gt_total_for_class > 0: + detector_aps.append(0.0) + evaluated_classes_with_gt.append(cls) + + # Calculate and report mAP for this detector based on evaluated classes with GT + if detector_aps: # Check if list is not empty + mAP = np.mean(detector_aps) + overall_mAPs[detector_name] = mAP + # Report which classes contributed to the mAP + mAP_info = f"(Classes w/ GT: {', '.join(evaluated_classes_with_gt)})" if evaluated_classes_with_gt else "" + print("-" * 85) + print(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}") + f.write("-" * 85 + "\n") + f.write(f"{'mAP':<15} | {mAP:<10.4f} {mAP_info}\n") + + # Add confusion matrix section for this detector + f.write("\nConfusion Matrix Summary (at best threshold per class):\n") + f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") + f.write("-" * 75 + "\n") + + for cls in evaluated_classes_with_gt: + if cls in results_by_class: + res = results_by_class[cls] + best_thresh = res.get('best_threshold', 0.0) + cm = res.get('confusion_matrix', {}) + + tp = cm.get('TP', 0) + fp = cm.get('FP', 0) + fn = cm.get('FN', 0) + + precision = tp / (tp + fp) if (tp + fp) > 0 else 0 + recall = tp / (tp + fn) if (tp + fn) > 0 else 0 + + f.write(f"{cls:<15} | {best_thresh:<10.3f} | {tp:<5} | {fp:<5} | {fn:<5} | {precision:<10.4f} | {recall:<10.4f}\n") + + else: + overall_mAPs[detector_name] = np.nan # Indicate mAP couldn't be computed + print("-" * 85) + print(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}") + f.write("-" * 85 + "\n") + f.write(f"{'mAP':<15} | {'N/A (No GT in evaluated classes)':<10}\n") + + + ######## Overall Comparison Table + # Check if there are results to compare + if len(final_detector_names) > 0 and results_all_detectors: + print("\n[LOG] Overall Class AP Comparison") + f.write("\n" + "="*60 + "\n") + f.write("Overall Class AP Comparison\n") + f.write("="*60 + "\n") + # Truncate long detector names for table header + header_names = [name[:12] for name in final_detector_names] # Limit name length + header = f"{'Class':<15}" + "".join([f" | {name:<12}" for name in header_names]) + print(header) + f.write(header + "\n") + print("-" * len(header)) + f.write("-" * len(header) + "\n") + + # Iterate through user-specified classes for the table rows + for cls in args.classes: + aps_str = "" + num_gt_total_for_class = sum(len(gt_boxes.get(cls, [])) for gt_boxes in gt_boxes_all_samples.values()) + + # Get AP for each detector for this class + for detector_name in final_detector_names: + # Retrieve AP using .get chains safely + ap_val = results_all_detectors.get(detector_name, {}).get(cls, {}).get('ap', np.nan) + + # Decide display value based on AP and total GT count + if np.isnan(ap_val): + # Show 0.0000 if GT existed, else N/A + display_val = f"{0.0:<12.4f}" if num_gt_total_for_class > 0 else f"{'N/A':<12}" + else: + # Format valid AP value + display_val = f"{ap_val:<12.4f}" + aps_str += f" | {display_val}" + + line = f"{cls:<15}" + aps_str + print(line) + f.write(line + "\n") + + ######## Overall mAP Comparison + print("-" * len(header)) + f.write("-" * len(header) + "\n") + map_str = "" + for detector_name in final_detector_names: + map_val = overall_mAPs.get(detector_name, np.nan) # Get calculated mAP + map_str += f" | {map_val:<12.4f}" if not np.isnan(map_val) else f" | {'N/A':<12}" + map_line = f"{'mAP':<15}" + map_str + print(map_line) + f.write(map_line + "\n") + else: + print("\n[LOG] Skipping overall comparison table - no results available.") + f.write("\n[LOG] Skipping overall comparison table - no results available.\n") + + print(f"\nMetrics saved to: {results_file_path}") + + except Exception as e: + print(f"[LOG] Error during results reporting/saving: {e}") + traceback.print_exc() + # Plot + # return # Or stop + + ########## Export Confusion Matrix CSV + try: + export_confusion_matrix(results_all_detectors, args.output_dir, args.classes) + except Exception as e: + print(f"[LOG] Error exporting confusion matrix: {e}") + traceback.print_exc() + + ########## Plotting + print("\n[LOG] Generating PR curves...") + try: + # Check if results_all_detectors has data before plotting + if results_all_detectors and any(results_all_detectors.values()): + # Pass only valid detector results to plotting function + # Filter out detectors that were skipped or had errors resulting in no results dict + valid_results = {k: v for k, v in results_all_detectors.items() if isinstance(v, dict) and k in final_detector_names} + if valid_results: + plot_pr_curves(valid_results, args.classes, args.output_dir) + print(f"[LOG] PR curve plots potentially saved in: {args.output_dir}") + else: + print("[LOG] Skipping plotting - no valid evaluation results were generated.") + else: + print("[LOG] Skipping plotting - no evaluation results generated.") + + except Exception as e: + print(f"Error during plotting: {e}") + traceback.print_exc() + + print("\n[LOG] Evaluation finished.") + + +if __name__ == '__main__': + main() diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt b/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/CmakeLists.txt similarity index 100% rename from GEMstack/onboard/perception/sensorFusion/pointpillars_ros/CmakeLists.txt rename to GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/CmakeLists.txt diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch b/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/launch/pointpillars_test.launch similarity index 100% rename from GEMstack/onboard/perception/sensorFusion/pointpillars_ros/launch/pointpillars_test.launch rename to GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/launch/pointpillars_test.launch diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml b/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/package.xml similarity index 100% rename from GEMstack/onboard/perception/sensorFusion/pointpillars_ros/package.xml rename to GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/package.xml diff --git a/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/readme.md b/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/readme.md new file mode 100644 index 000000000..fe8e3ab07 --- /dev/null +++ b/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/readme.md @@ -0,0 +1 @@ +Please see GEMstack\offboard\perception\sensorFusionTesting\pointpillars_ros\scripts\pointpillars_ros_node.py for running tests. \ No newline at end of file diff --git a/GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py b/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/scripts/pointpillars_ros_node.py similarity index 100% rename from GEMstack/onboard/perception/sensorFusion/pointpillars_ros/scripts/pointpillars_ros_node.py rename to GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/scripts/pointpillars_ros_node.py diff --git a/GEMstack/onboard/perception/sensorFusion/rosbag_processor.py b/GEMstack/offboard/perception/sensorFusionTesting/rosbag_processor.py similarity index 100% rename from GEMstack/onboard/perception/sensorFusion/rosbag_processor.py rename to GEMstack/offboard/perception/sensorFusionTesting/rosbag_processor.py diff --git a/GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py b/GEMstack/offboard/perception/sensorFusionTesting/runner_eval_3d_model_performance.py similarity index 97% rename from GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py rename to GEMstack/offboard/perception/sensorFusionTesting/runner_eval_3d_model_performance.py index c20e1d392..26629b1dc 100644 --- a/GEMstack/onboard/perception/sensorFusion/runner_eval_3d_model_performance.py +++ b/GEMstack/offboard/perception/sensorFusionTesting/runner_eval_3d_model_performance.py @@ -1,321 +1,321 @@ -import os -import sys -import numpy as np -import traceback -import shutil - - -print(""" -######################################################### -# 3D Bounding Box Evaluation Tool -######################################################### - -This tool evaluates the performance of multiple 3D object detection models -by calculating metrics including: - -1. Confusion Matrix Metrics: - - True Positives (TP), False Positives (FP), False Negatives (FN) - - Precision, Recall, and AP for each class and detector - -2. Confidence Score Analysis: - - Evaluates detector performance at different confidence thresholds - - Automatically determines optimal confidence threshold per class - -3. Visualization: - - PR curves comparing different detection models - - Threshold sensitivity curves showing performance across thresholds - -All metrics are saved to CSV files and visualization plots for detailed analysis. -######################################################### -""") - -eval_script_filename = "eval_3d_model_performance.py" -eval_module_name = "eval_3d_model_performance" -try: - if not os.path.exists(eval_script_filename): - raise FileNotFoundError(f"Evaluation script '{eval_script_filename}' not found in the current directory.") - - # Attempt the standard import - imported_module = __import__(eval_module_name) - print(f"Module '{eval_module_name}' imported successfully.") - -except: - print("##################") - print(f"{eval_script_filename} import error") - print("##################") - exit(1) - -####### Setup paths for data directories - -# Root directory with data files -project_dir = "./data" - -# Ground truth directory (KITTI format label files) -gt_dir = os.path.join(project_dir, "ground_truth/label_2") - -# Prediction directories for different models (results in KITTI format) -pred1_dir = os.path.join(project_dir, "predictions/YOLO_lidar_gem_combined/results") -pred2_dir = os.path.join(project_dir, "predictions/pointpillars/results") -pred3_dir = os.path.join(project_dir, "predictions/yolo_3d/results") - -# Output directory for evaluation results -base_output_dir = "./evaluation_results" -eval_output_dir = os.path.join(base_output_dir, "model_comparison") - -# Ensure output directory exists -if not os.path.exists(eval_output_dir): - os.makedirs(eval_output_dir) - -# Information about data locations -print("\n[LOG] Using data files from the following locations:") -print(f"Ground truth directory: {gt_dir}") -print(f"Prediction directories:") -print(f" - {pred1_dir} (YOLO_lidar_gem_combined)") -print(f" - {pred2_dir} (PointPillars)") -print(f" - {pred3_dir} (YOLO-3D)") - -# Define detector names for the evaluation -detector_names = ['YOLO_lidar_gem_combined', 'PointPillars', 'YOLO_3D'] -pred_dirs = [pred1_dir, pred2_dir, pred3_dir] - -####### Run the Evaluation Script - -print(f"\n[LOG] Running 3D detection evaluation...") -cmd_args = [ - gt_dir, - *pred_dirs, # All prediction directories - eval_output_dir, - '--detector_names', *detector_names, - '--iou_threshold', '0.7', - '--classes', 'Car', 'Pedestrian', 'Cyclist', - '--confidence_thresholds', '0.0', '0.3', '0.5', '0.7', '0.9' -] - -original_argv = sys.argv -sys.argv = [eval_script_filename] + cmd_args -exit_code = 0 - -try: - # Create sample output files to demonstrate format - # (In a real run with existing files, we would call imported_module.main() directly) - - print("\n[LOG] In a real evaluation with existing data files, we would process:") - for dir_path in [gt_dir] + pred_dirs: - print(f" - {dir_path}") - print("\n[LOG] Generating sample output to demonstrate the format...") - - # Create output directories - if not os.path.exists(eval_output_dir): - os.makedirs(eval_output_dir) - - # Create a sample metrics file to demonstrate the output format - metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') - with open(metrics_file, 'w') as f: - f.write("Evaluation Results (IoU Threshold: 0.7)\n") - f.write("Evaluated Classes: Car, Pedestrian, Cyclist\n") - f.write("Confidence Thresholds: [0.0, 0.3, 0.5, 0.7, 0.9]\n") - f.write("="*60 + "\n\n") - - for detector in detector_names: - f.write(f"Detector: {detector}\n") - f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") - f.write("-" * 85 + "\n") - - # Sample results for each class - f.write(f"{'Car':<15} | {0.8765:<10.4f} | {142:<10} | {156:<10} | {0.7:<11.3f} | {120:<5} | {36:<5} | {22:<5}\n") - f.write(f"{'Pedestrian':<15} | {0.7123:<10.4f} | {85:<10} | {102:<10} | {0.5:<11.3f} | {65:<5} | {37:<5} | {20:<5}\n") - f.write(f"{'Cyclist':<15} | {0.6897:<10.4f} | {32:<10} | {41:<10} | {0.3:<11.3f} | {24:<5} | {17:<5} | {8:<5}\n") - - f.write("-" * 85 + "\n") - f.write(f"{'mAP':<15} | {0.7595:<10.4f} (Classes w/ GT: Car, Pedestrian, Cyclist)\n\n") - - # Confusion matrix summary - f.write("Confusion Matrix Summary (at best threshold per class):\n") - f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") - f.write("-" * 75 + "\n") - f.write(f"{'Car':<15} | {0.7:<10.3f} | {120:<5} | {36:<5} | {22:<5} | {0.7692:<10.4f} | {0.8451:<10.4f}\n") - f.write(f"{'Pedestrian':<15} | {0.5:<10.3f} | {65:<5} | {37:<5} | {20:<5} | {0.6373:<10.4f} | {0.7647:<10.4f}\n") - f.write(f"{'Cyclist':<15} | {0.3:<10.3f} | {24:<5} | {17:<5} | {8:<5} | {0.5854:<10.4f} | {0.7500:<10.4f}\n\n") - - # Overall comparison - f.write("="*60 + "\n") - f.write("Overall Class AP Comparison\n") - f.write("="*60 + "\n") - f.write(f"{'Class':<15} | {'YOLO_lidar_gem_combined':<24} | {'PointPillars':<12} | {'YOLO_3D':<12}\n") - f.write("-" * 68 + "\n") - f.write(f"{'Car':<15} | {0.9012:<24.4f} | {0.8234:<12.4f} | {0.8765:<12.4f}\n") - f.write(f"{'Pedestrian':<15} | {0.7789:<24.4f} | {0.7456:<12.4f} | {0.7123:<12.4f}\n") - f.write(f"{'Cyclist':<15} | {0.7234:<24.4f} | {0.6345:<12.4f} | {0.6897:<12.4f}\n") - f.write("-" * 68 + "\n") - f.write(f"{'mAP':<15} | {0.8012:<24.4f} | {0.7345:<12.4f} | {0.7595:<12.4f}\n") - - # Create confusion matrix directory and files - confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') - if not os.path.exists(confusion_dir): - os.makedirs(confusion_dir) - - # Sample summary CSV file - summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') - with open(summary_file, 'w') as f: - f.write("Detector,Class,Threshold,TP,FP,FN,TN,Precision,Recall,AP\n") - f.write("YOLO_lidar_gem_combined,Car,* 0.700,120,36,22,0,0.7692,0.8451,0.8765\n") - f.write("YOLO_lidar_gem_combined,Car,0.000,142,85,0,0,0.6256,1.0000,0.7456\n") - f.write("YOLO_lidar_gem_combined,Car,0.300,135,65,7,0,0.6750,0.9507,0.7890\n") - f.write("YOLO_lidar_gem_combined,Car,0.500,128,48,14,0,0.7273,0.9014,0.8234\n") - f.write("YOLO_lidar_gem_combined,Car,0.700,120,36,22,0,0.7692,0.8451,0.8765\n") - f.write("YOLO_lidar_gem_combined,Car,0.900,95,18,47,0,0.8407,0.6690,0.7123\n") - - # Create PR curves directory - pr_dir = os.path.join(eval_output_dir, 'pr_curves') - if not os.path.exists(pr_dir): - os.makedirs(pr_dir) - - print(f"\n[LOG] Created sample output files in {eval_output_dir}") - - # NOTE: In a production environment with real data, - # uncomment this line to run the actual evaluation: - # imported_module.main() - -except Exception as e: - traceback.print_exc() - exit_code = 1 -finally: - # Restore original sys.argv - sys.argv = original_argv - -####### Output files and results - -print("\n--- Generated Output Files ---") -if os.path.exists(eval_output_dir): - try: - for root, dirs, files in os.walk(eval_output_dir): - rel_path = os.path.relpath(root, eval_output_dir) - if rel_path == '.': - print(f"Root output directory:") - else: - print(f"\nSubdirectory: {rel_path}") - - for file in sorted(files): - print(f" - {os.path.join(rel_path, file)}") - except Exception as e: - print(f"Error listing output directory {eval_output_dir}: {e}") -else: - print(f"Output directory '{eval_output_dir}' not created or accessible.") - -# Display sample -metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') -if exit_code == 0 and os.path.exists(metrics_file): - print(f"\n--- Sample of evaluation_metrics.txt ---") - try: - with open(metrics_file, 'r') as f: - lines = f.readlines() - # Print header and first detector results (truncated) - for i, line in enumerate(lines): - if i < 15: # Just show the beginning - print(line.strip()) - print("... (output truncated)") - except Exception as e: - print(f"Error reading metrics file {metrics_file}: {e}") - -# Display sample of confusion matrix data -confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') -if os.path.exists(confusion_dir): - summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') - if os.path.exists(summary_file): - print(f"\n--- Sample of confusion matrix data ---") - try: - with open(summary_file, 'r') as f: - # Print header and first few lines - for i, line in enumerate(f): - if i < 5: # Just show the beginning - print(line.strip()) - print("... (output truncated)") - except Exception as e: - print(f"Error reading confusion matrix summary: {e}") - -print(f"\n[LOG] Evaluation complete. Results saved to: {eval_output_dir}") - -####### Testing utilities (for development only) - -""" -def create_dummy_kitti_data(base_dir, num_samples=3, classes=['Car', 'Pedestrian'], boxes_per_sample=5, - is_pred=False, noise_level=0.1, score_range=(0.5, 1.0), seed=42): - ''' - Generates dummy data files in KITTI format for testing. - - Args: - base_dir: Directory to create files in - num_samples: Number of sample files to create - classes: List of classes to include - boxes_per_sample: Maximum number of boxes per sample - is_pred: Whether to create prediction data (includes confidence scores) - noise_level: Level of noise to add to prediction coordinates - score_range: Range of confidence scores for predictions (min, max) - seed: Random seed for reproducibility - ''' - if os.path.exists(base_dir): - shutil.rmtree(base_dir) # Clean previous runs - os.makedirs(base_dir) - np.random.seed(seed) # reproducibility - - for i in range(num_samples): - filename = os.path.join(base_dir, f"{i:06d}.txt") - with open(filename, 'w') as f: - num_boxes = np.random.randint(1, boxes_per_sample + 1) - for _ in range(num_boxes): - cls = np.random.choice(classes) - - # Generate box parameters - h = np.random.uniform(1.4, 1.8) if cls == 'Car' else np.random.uniform(1.5, 1.9) # height - w = np.random.uniform(1.5, 2.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # width - l = np.random.uniform(3.5, 5.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # length - - loc_x = np.random.uniform(-15, 15) # center x (lateral) - loc_z = np.random.uniform(5, 50) # center z (depth) - loc_y_bottom = np.random.uniform(1.6, 1.7) # Approximate height of bottom relative to camera origin - rot_y = np.random.uniform(-np.pi/2, np.pi/2) # Yaw - - # Placeholder values - truncated = 0.0 - occluded = 0 # 0=visible - alpha = -10 - bbox_2d = [0.0, 0.0, 50.0, 50.0] - - # Set confidence score - score = np.random.uniform(score_range[0], score_range[1]) - - # Add noise for predictions - if is_pred: - h *= np.random.normal(1, noise_level * 0.1) - w *= np.random.normal(1, noise_level * 0.1) - l *= np.random.normal(1, noise_level * 0.1) - loc_x += np.random.normal(0, noise_level * 1.0) - loc_y_bottom += np.random.normal(0, noise_level * 0.1) - loc_z += np.random.normal(0, noise_level * 3.0) - rot_y += np.random.normal(0, noise_level * np.pi/8) - h, w, l = max(0.1, h), max(0.1, w), max(0.1, l) # Ensure positive dimensions - - # Format the line string to KITTI standard - line_parts = [ - cls, f"{truncated:.2f}", f"{occluded:d}", f"{alpha:.2f}", - f"{bbox_2d[0]:.2f}", f"{bbox_2d[1]:.2f}", f"{bbox_2d[2]:.2f}", f"{bbox_2d[3]:.2f}", - f"{h:.2f}", f"{w:.2f}", f"{l:.2f}", - f"{loc_x:.2f}", f"{loc_y_bottom:.2f}", f"{loc_z:.2f}", - f"{rot_y:.2f}" - ] - - if is_pred: - line_parts.append(f"{score:.4f}") - - f.write(" ".join(line_parts) + "\n") - -# Example usage to generate test data: -# base_dir = "./test_data" -# gt_dir = os.path.join(base_dir, "gt") -# pred1_dir = os.path.join(base_dir, "pred1") -# pred2_dir = os.path.join(base_dir, "pred2") -# -# create_dummy_kitti_data(gt_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=False) -# create_dummy_kitti_data(pred1_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.6, 0.9)) -# create_dummy_kitti_data(pred2_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.3, 0.95)) -""" +import os +import sys +import numpy as np +import traceback +import shutil + + +print(""" +######################################################### +# 3D Bounding Box Evaluation Tool +######################################################### + +This tool evaluates the performance of multiple 3D object detection models +by calculating metrics including: + +1. Confusion Matrix Metrics: + - True Positives (TP), False Positives (FP), False Negatives (FN) + - Precision, Recall, and AP for each class and detector + +2. Confidence Score Analysis: + - Evaluates detector performance at different confidence thresholds + - Automatically determines optimal confidence threshold per class + +3. Visualization: + - PR curves comparing different detection models + - Threshold sensitivity curves showing performance across thresholds + +All metrics are saved to CSV files and visualization plots for detailed analysis. +######################################################### +""") + +eval_script_filename = "eval_3d_model_performance.py" +eval_module_name = "eval_3d_model_performance" +try: + if not os.path.exists(eval_script_filename): + raise FileNotFoundError(f"Evaluation script '{eval_script_filename}' not found in the current directory.") + + # Attempt the standard import + imported_module = __import__(eval_module_name) + print(f"Module '{eval_module_name}' imported successfully.") + +except: + print("##################") + print(f"{eval_script_filename} import error") + print("##################") + exit(1) + +####### Setup paths for data directories + +# Root directory with data files +project_dir = "./data" + +# Ground truth directory (KITTI format label files) +gt_dir = os.path.join(project_dir, "ground_truth/label_2") + +# Prediction directories for different models (results in KITTI format) +pred1_dir = os.path.join(project_dir, "predictions/YOLO_lidar_gem_combined/results") +pred2_dir = os.path.join(project_dir, "predictions/pointpillars/results") +pred3_dir = os.path.join(project_dir, "predictions/yolo_3d/results") + +# Output directory for evaluation results +base_output_dir = "./evaluation_results" +eval_output_dir = os.path.join(base_output_dir, "model_comparison") + +# Ensure output directory exists +if not os.path.exists(eval_output_dir): + os.makedirs(eval_output_dir) + +# Information about data locations +print("\n[LOG] Using data files from the following locations:") +print(f"Ground truth directory: {gt_dir}") +print(f"Prediction directories:") +print(f" - {pred1_dir} (YOLO_lidar_gem_combined)") +print(f" - {pred2_dir} (PointPillars)") +print(f" - {pred3_dir} (YOLO-3D)") + +# Define detector names for the evaluation +detector_names = ['YOLO_lidar_gem_combined', 'PointPillars', 'YOLO_3D'] +pred_dirs = [pred1_dir, pred2_dir, pred3_dir] + +####### Run the Evaluation Script + +print(f"\n[LOG] Running 3D detection evaluation...") +cmd_args = [ + gt_dir, + *pred_dirs, # All prediction directories + eval_output_dir, + '--detector_names', *detector_names, + '--iou_threshold', '0.7', + '--classes', 'Car', 'Pedestrian', 'Cyclist', + '--confidence_thresholds', '0.0', '0.3', '0.5', '0.7', '0.9' +] + +original_argv = sys.argv +sys.argv = [eval_script_filename] + cmd_args +exit_code = 0 + +try: + # Create sample output files to demonstrate format + # (In a real run with existing files, we would call imported_module.main() directly) + + print("\n[LOG] In a real evaluation with existing data files, we would process:") + for dir_path in [gt_dir] + pred_dirs: + print(f" - {dir_path}") + print("\n[LOG] Generating sample output to demonstrate the format...") + + # Create output directories + if not os.path.exists(eval_output_dir): + os.makedirs(eval_output_dir) + + # Create a sample metrics file to demonstrate the output format + metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') + with open(metrics_file, 'w') as f: + f.write("Evaluation Results (IoU Threshold: 0.7)\n") + f.write("Evaluated Classes: Car, Pedestrian, Cyclist\n") + f.write("Confidence Thresholds: [0.0, 0.3, 0.5, 0.7, 0.9]\n") + f.write("="*60 + "\n\n") + + for detector in detector_names: + f.write(f"Detector: {detector}\n") + f.write(f"{'Class':<15} | {'AP':<10} | {'Num GT':<10} | {'Num Pred':<10} | {'Best Thresh':<11} | {'TP':<5} | {'FP':<5} | {'FN':<5}\n") + f.write("-" * 85 + "\n") + + # Sample results for each class + f.write(f"{'Car':<15} | {0.8765:<10.4f} | {142:<10} | {156:<10} | {0.7:<11.3f} | {120:<5} | {36:<5} | {22:<5}\n") + f.write(f"{'Pedestrian':<15} | {0.7123:<10.4f} | {85:<10} | {102:<10} | {0.5:<11.3f} | {65:<5} | {37:<5} | {20:<5}\n") + f.write(f"{'Cyclist':<15} | {0.6897:<10.4f} | {32:<10} | {41:<10} | {0.3:<11.3f} | {24:<5} | {17:<5} | {8:<5}\n") + + f.write("-" * 85 + "\n") + f.write(f"{'mAP':<15} | {0.7595:<10.4f} (Classes w/ GT: Car, Pedestrian, Cyclist)\n\n") + + # Confusion matrix summary + f.write("Confusion Matrix Summary (at best threshold per class):\n") + f.write(f"{'Class':<15} | {'Threshold':<10} | {'TP':<5} | {'FP':<5} | {'FN':<5} | {'Precision':<10} | {'Recall':<10}\n") + f.write("-" * 75 + "\n") + f.write(f"{'Car':<15} | {0.7:<10.3f} | {120:<5} | {36:<5} | {22:<5} | {0.7692:<10.4f} | {0.8451:<10.4f}\n") + f.write(f"{'Pedestrian':<15} | {0.5:<10.3f} | {65:<5} | {37:<5} | {20:<5} | {0.6373:<10.4f} | {0.7647:<10.4f}\n") + f.write(f"{'Cyclist':<15} | {0.3:<10.3f} | {24:<5} | {17:<5} | {8:<5} | {0.5854:<10.4f} | {0.7500:<10.4f}\n\n") + + # Overall comparison + f.write("="*60 + "\n") + f.write("Overall Class AP Comparison\n") + f.write("="*60 + "\n") + f.write(f"{'Class':<15} | {'YOLO_lidar_gem_combined':<24} | {'PointPillars':<12} | {'YOLO_3D':<12}\n") + f.write("-" * 68 + "\n") + f.write(f"{'Car':<15} | {0.9012:<24.4f} | {0.8234:<12.4f} | {0.8765:<12.4f}\n") + f.write(f"{'Pedestrian':<15} | {0.7789:<24.4f} | {0.7456:<12.4f} | {0.7123:<12.4f}\n") + f.write(f"{'Cyclist':<15} | {0.7234:<24.4f} | {0.6345:<12.4f} | {0.6897:<12.4f}\n") + f.write("-" * 68 + "\n") + f.write(f"{'mAP':<15} | {0.8012:<24.4f} | {0.7345:<12.4f} | {0.7595:<12.4f}\n") + + # Create confusion matrix directory and files + confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') + if not os.path.exists(confusion_dir): + os.makedirs(confusion_dir) + + # Sample summary CSV file + summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + with open(summary_file, 'w') as f: + f.write("Detector,Class,Threshold,TP,FP,FN,TN,Precision,Recall,AP\n") + f.write("YOLO_lidar_gem_combined,Car,* 0.700,120,36,22,0,0.7692,0.8451,0.8765\n") + f.write("YOLO_lidar_gem_combined,Car,0.000,142,85,0,0,0.6256,1.0000,0.7456\n") + f.write("YOLO_lidar_gem_combined,Car,0.300,135,65,7,0,0.6750,0.9507,0.7890\n") + f.write("YOLO_lidar_gem_combined,Car,0.500,128,48,14,0,0.7273,0.9014,0.8234\n") + f.write("YOLO_lidar_gem_combined,Car,0.700,120,36,22,0,0.7692,0.8451,0.8765\n") + f.write("YOLO_lidar_gem_combined,Car,0.900,95,18,47,0,0.8407,0.6690,0.7123\n") + + # Create PR curves directory + pr_dir = os.path.join(eval_output_dir, 'pr_curves') + if not os.path.exists(pr_dir): + os.makedirs(pr_dir) + + print(f"\n[LOG] Created sample output files in {eval_output_dir}") + + # NOTE: In a production environment with real data, + # uncomment this line to run the actual evaluation: + # imported_module.main() + +except Exception as e: + traceback.print_exc() + exit_code = 1 +finally: + # Restore original sys.argv + sys.argv = original_argv + +####### Output files and results + +print("\n--- Generated Output Files ---") +if os.path.exists(eval_output_dir): + try: + for root, dirs, files in os.walk(eval_output_dir): + rel_path = os.path.relpath(root, eval_output_dir) + if rel_path == '.': + print(f"Root output directory:") + else: + print(f"\nSubdirectory: {rel_path}") + + for file in sorted(files): + print(f" - {os.path.join(rel_path, file)}") + except Exception as e: + print(f"Error listing output directory {eval_output_dir}: {e}") +else: + print(f"Output directory '{eval_output_dir}' not created or accessible.") + +# Display sample +metrics_file = os.path.join(eval_output_dir, 'evaluation_metrics.txt') +if exit_code == 0 and os.path.exists(metrics_file): + print(f"\n--- Sample of evaluation_metrics.txt ---") + try: + with open(metrics_file, 'r') as f: + lines = f.readlines() + # Print header and first detector results (truncated) + for i, line in enumerate(lines): + if i < 15: # Just show the beginning + print(line.strip()) + print("... (output truncated)") + except Exception as e: + print(f"Error reading metrics file {metrics_file}: {e}") + +# Display sample of confusion matrix data +confusion_dir = os.path.join(eval_output_dir, 'confusion_matrices') +if os.path.exists(confusion_dir): + summary_file = os.path.join(confusion_dir, 'confusion_matrix_summary.csv') + if os.path.exists(summary_file): + print(f"\n--- Sample of confusion matrix data ---") + try: + with open(summary_file, 'r') as f: + # Print header and first few lines + for i, line in enumerate(f): + if i < 5: # Just show the beginning + print(line.strip()) + print("... (output truncated)") + except Exception as e: + print(f"Error reading confusion matrix summary: {e}") + +print(f"\n[LOG] Evaluation complete. Results saved to: {eval_output_dir}") + +####### Testing utilities (for development only) + +""" +def create_dummy_kitti_data(base_dir, num_samples=3, classes=['Car', 'Pedestrian'], boxes_per_sample=5, + is_pred=False, noise_level=0.1, score_range=(0.5, 1.0), seed=42): + ''' + Generates dummy data files in KITTI format for testing. + + Args: + base_dir: Directory to create files in + num_samples: Number of sample files to create + classes: List of classes to include + boxes_per_sample: Maximum number of boxes per sample + is_pred: Whether to create prediction data (includes confidence scores) + noise_level: Level of noise to add to prediction coordinates + score_range: Range of confidence scores for predictions (min, max) + seed: Random seed for reproducibility + ''' + if os.path.exists(base_dir): + shutil.rmtree(base_dir) # Clean previous runs + os.makedirs(base_dir) + np.random.seed(seed) # reproducibility + + for i in range(num_samples): + filename = os.path.join(base_dir, f"{i:06d}.txt") + with open(filename, 'w') as f: + num_boxes = np.random.randint(1, boxes_per_sample + 1) + for _ in range(num_boxes): + cls = np.random.choice(classes) + + # Generate box parameters + h = np.random.uniform(1.4, 1.8) if cls == 'Car' else np.random.uniform(1.5, 1.9) # height + w = np.random.uniform(1.5, 2.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # width + l = np.random.uniform(3.5, 5.0) if cls == 'Car' else np.random.uniform(0.5, 1.0) # length + + loc_x = np.random.uniform(-15, 15) # center x (lateral) + loc_z = np.random.uniform(5, 50) # center z (depth) + loc_y_bottom = np.random.uniform(1.6, 1.7) # Approximate height of bottom relative to camera origin + rot_y = np.random.uniform(-np.pi/2, np.pi/2) # Yaw + + # Placeholder values + truncated = 0.0 + occluded = 0 # 0=visible + alpha = -10 + bbox_2d = [0.0, 0.0, 50.0, 50.0] + + # Set confidence score + score = np.random.uniform(score_range[0], score_range[1]) + + # Add noise for predictions + if is_pred: + h *= np.random.normal(1, noise_level * 0.1) + w *= np.random.normal(1, noise_level * 0.1) + l *= np.random.normal(1, noise_level * 0.1) + loc_x += np.random.normal(0, noise_level * 1.0) + loc_y_bottom += np.random.normal(0, noise_level * 0.1) + loc_z += np.random.normal(0, noise_level * 3.0) + rot_y += np.random.normal(0, noise_level * np.pi/8) + h, w, l = max(0.1, h), max(0.1, w), max(0.1, l) # Ensure positive dimensions + + # Format the line string to KITTI standard + line_parts = [ + cls, f"{truncated:.2f}", f"{occluded:d}", f"{alpha:.2f}", + f"{bbox_2d[0]:.2f}", f"{bbox_2d[1]:.2f}", f"{bbox_2d[2]:.2f}", f"{bbox_2d[3]:.2f}", + f"{h:.2f}", f"{w:.2f}", f"{l:.2f}", + f"{loc_x:.2f}", f"{loc_y_bottom:.2f}", f"{loc_z:.2f}", + f"{rot_y:.2f}" + ] + + if is_pred: + line_parts.append(f"{score:.4f}") + + f.write(" ".join(line_parts) + "\n") + +# Example usage to generate test data: +# base_dir = "./test_data" +# gt_dir = os.path.join(base_dir, "gt") +# pred1_dir = os.path.join(base_dir, "pred1") +# pred2_dir = os.path.join(base_dir, "pred2") +# +# create_dummy_kitti_data(gt_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=False) +# create_dummy_kitti_data(pred1_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.6, 0.9)) +# create_dummy_kitti_data(pred2_dir, num_samples=5, classes=['Car', 'Pedestrian'], is_pred=True, score_range=(0.3, 0.95)) +""" diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index 59ab5edae..bb7bd84b4 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -1,7 +1,7 @@ from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum from ..interface.gem import GEMInterface from ..component import Component -from .perception_utils import pose_to_matrix +from .perception_utils import pose_to_matrix, calculate_3d_iou from .perception_utils_gem import * from typing import Dict, List, Optional, Tuple import rospy @@ -14,7 +14,6 @@ from jsk_recognition_msgs.msg import BoundingBox, BoundingBoxArray from geometry_msgs.msg import Quaternion -from .sensorFusion.eval_3d_model_performance import calculate_3d_iou import copy diff --git a/GEMstack/onboard/perception/perception_utils.py b/GEMstack/onboard/perception/perception_utils.py index fd68e43f3..af246c15d 100644 --- a/GEMstack/onboard/perception/perception_utils.py +++ b/GEMstack/onboard/perception/perception_utils.py @@ -216,4 +216,52 @@ def pose_to_matrix(pose): T = np.eye(4) T[:3, :3] = R_mat T[:3, 3] = np.array([x, y, z]) - return T \ No newline at end of file + return T + +def calculate_3d_iou(box1, box2, get_corners_cb, get_volume_cb): + """ + Calculates the 3D Intersection over Union (IoU) between two bounding boxes. + + Args: + box1, box2: List or tuple representing a 3D bounding box in the + *internal standardized format*: [cx, cy, cz, l, w, h, yaw, ...] + where cy is the geometric center y. + get_corners_cb: Callback function to extract AABB corners from a box + get_volume_cb: Callback function to calculate volume of a box + + Returns: + float: The 3D IoU value. + + Doesn't consider yaw + """ + + ######### Simple Axis-Aligned Bounding Box (AABB) IoU# + min_x1, max_x1, min_y1, max_y1, min_z1, max_z1 = get_corners_cb(box1) + min_x2, max_x2, min_y2, max_y2, min_z2, max_z2 = get_corners_cb(box2) + + # Calculate intersection volume + inter_min_x = max(min_x1, min_x2) + inter_max_x = min(max_x1, max_x2) + inter_min_y = max(min_y1, min_y2) + inter_max_y = min(max_y1, max_y2) + inter_min_z = max(min_z1, min_z2) + inter_max_z = min(max_z1, max_z2) + + inter_l = max(0, inter_max_x - inter_min_x) + inter_h = max(0, inter_max_y - inter_min_y) + inter_w = max(0, inter_max_z - inter_min_z) + intersection_volume = inter_l * inter_h * inter_w + + # Calculate union volume + vol1 = get_volume_cb(box1) + vol2 = get_volume_cb(box2) + # Ensure volumes are positive and non-zero for stable IoU + vol1 = max(vol1, 1e-6) + vol2 = max(vol2, 1e-6) + union_volume = vol1 + vol2 - intersection_volume + union_volume = max(union_volume, 1e-6) # Avoid division by zero or very small numbers + + iou = intersection_volume / union_volume + # Clamp IoU to [0, 1] range + iou = max(0.0, min(iou, 1.0)) + return iou \ No newline at end of file From 0584d1927c8614cf1b10d3a2c33f799d3440c017 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Fri, 16 May 2025 14:32:28 -0500 Subject: [PATCH 5/8] Moved test files and made cones collidable --- GEMstack/onboard/perception/cone_detection.py | 6 +++--- {GEMstack/onboard => testing}/perception/person_detector.py | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename {GEMstack/onboard => testing}/perception/person_detector.py (100%) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index e2d438ace..af1280d67 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -367,7 +367,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, - collidable=False + collidable=True ) else: updated_obstacle = old_state @@ -382,7 +382,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, - collidable=False + collidable=True ) obstacles[obstacle_id] = new_obstacle self.tracked_obstacles[obstacle_id] = new_obstacle @@ -395,7 +395,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, - collidable=False + collidable=True ) obstacles[obstacle_id] = new_obstacle diff --git a/GEMstack/onboard/perception/person_detector.py b/testing/perception/person_detector.py similarity index 100% rename from GEMstack/onboard/perception/person_detector.py rename to testing/perception/person_detector.py From cd915f6f535da28f4bd37a968e85dcfc2efff6e8 Mon Sep 17 00:00:00 2001 From: Lukas Dumasius Date: Fri, 16 May 2025 14:56:01 -0500 Subject: [PATCH 6/8] move sensorFusion testing to testing/ --- .../perception/sensorFusionTesting/eval_3d_model_performance.py | 0 .../sensorFusionTesting/pointpillars_ros/CmakeLists.txt | 0 .../pointpillars_ros/launch/pointpillars_test.launch | 0 .../perception/sensorFusionTesting/pointpillars_ros/package.xml | 0 .../perception/sensorFusionTesting/pointpillars_ros/readme.md | 0 .../pointpillars_ros/scripts/pointpillars_ros_node.py | 0 .../perception/sensorFusionTesting/rosbag_processor.py | 0 .../sensorFusionTesting/runner_eval_3d_model_performance.py | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename {GEMstack/offboard => testing}/perception/sensorFusionTesting/eval_3d_model_performance.py (100%) rename {GEMstack/offboard => testing}/perception/sensorFusionTesting/pointpillars_ros/CmakeLists.txt (100%) rename {GEMstack/offboard => testing}/perception/sensorFusionTesting/pointpillars_ros/launch/pointpillars_test.launch (100%) rename {GEMstack/offboard => testing}/perception/sensorFusionTesting/pointpillars_ros/package.xml (100%) rename {GEMstack/offboard => testing}/perception/sensorFusionTesting/pointpillars_ros/readme.md (100%) rename {GEMstack/offboard => testing}/perception/sensorFusionTesting/pointpillars_ros/scripts/pointpillars_ros_node.py (100%) rename {GEMstack/offboard => testing}/perception/sensorFusionTesting/rosbag_processor.py (100%) rename {GEMstack/offboard => testing}/perception/sensorFusionTesting/runner_eval_3d_model_performance.py (100%) diff --git a/GEMstack/offboard/perception/sensorFusionTesting/eval_3d_model_performance.py b/testing/perception/sensorFusionTesting/eval_3d_model_performance.py similarity index 100% rename from GEMstack/offboard/perception/sensorFusionTesting/eval_3d_model_performance.py rename to testing/perception/sensorFusionTesting/eval_3d_model_performance.py diff --git a/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/CmakeLists.txt b/testing/perception/sensorFusionTesting/pointpillars_ros/CmakeLists.txt similarity index 100% rename from GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/CmakeLists.txt rename to testing/perception/sensorFusionTesting/pointpillars_ros/CmakeLists.txt diff --git a/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/launch/pointpillars_test.launch b/testing/perception/sensorFusionTesting/pointpillars_ros/launch/pointpillars_test.launch similarity index 100% rename from GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/launch/pointpillars_test.launch rename to testing/perception/sensorFusionTesting/pointpillars_ros/launch/pointpillars_test.launch diff --git a/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/package.xml b/testing/perception/sensorFusionTesting/pointpillars_ros/package.xml similarity index 100% rename from GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/package.xml rename to testing/perception/sensorFusionTesting/pointpillars_ros/package.xml diff --git a/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/readme.md b/testing/perception/sensorFusionTesting/pointpillars_ros/readme.md similarity index 100% rename from GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/readme.md rename to testing/perception/sensorFusionTesting/pointpillars_ros/readme.md diff --git a/GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/scripts/pointpillars_ros_node.py b/testing/perception/sensorFusionTesting/pointpillars_ros/scripts/pointpillars_ros_node.py similarity index 100% rename from GEMstack/offboard/perception/sensorFusionTesting/pointpillars_ros/scripts/pointpillars_ros_node.py rename to testing/perception/sensorFusionTesting/pointpillars_ros/scripts/pointpillars_ros_node.py diff --git a/GEMstack/offboard/perception/sensorFusionTesting/rosbag_processor.py b/testing/perception/sensorFusionTesting/rosbag_processor.py similarity index 100% rename from GEMstack/offboard/perception/sensorFusionTesting/rosbag_processor.py rename to testing/perception/sensorFusionTesting/rosbag_processor.py diff --git a/GEMstack/offboard/perception/sensorFusionTesting/runner_eval_3d_model_performance.py b/testing/perception/sensorFusionTesting/runner_eval_3d_model_performance.py similarity index 100% rename from GEMstack/offboard/perception/sensorFusionTesting/runner_eval_3d_model_performance.py rename to testing/perception/sensorFusionTesting/runner_eval_3d_model_performance.py From 40bedbe63818a6058fa679d5a43c4f6d3303eb81 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Fri, 16 May 2025 15:47:41 -0500 Subject: [PATCH 7/8] Removed YOLO node, removed _gem utils file --- GEMstack/onboard/perception/README.md | 8 +- .../onboard/perception/combined_detection.py | 237 +++++++++++++++++- GEMstack/onboard/perception/cone_detection.py | 1 - .../perception/pedestrian_detection.py | 1 - .../onboard/perception/perception_utils.py | 36 ++- .../perception/perception_utils_gem.py | 35 --- 6 files changed, 269 insertions(+), 49 deletions(-) delete mode 100644 GEMstack/onboard/perception/perception_utils_gem.py diff --git a/GEMstack/onboard/perception/README.md b/GEMstack/onboard/perception/README.md index 33876d899..f6d3932f1 100644 --- a/GEMstack/onboard/perception/README.md +++ b/GEMstack/onboard/perception/README.md @@ -54,15 +54,11 @@ docker compose -f setup/docker-compose.yaml up ``` roscore ``` -6. Start up YOLO node (make sure you source first): -``` -python3 GEMstack/onboard/perception/yolo_node.py -``` -7. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first): +6. Run yaml file to start up the CombinedDetector3D GEMstack Component (make sure you source first): ``` python3 main.py --variant=detector_only launch/combined_detection.yaml ``` -8. Run a rosbag on a loop (make sure you source first): +7. Run a rosbag on a loop (make sure you source first): ``` rosbag play -l yourRosbagNameGoesHere.bag ``` diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index bb7bd84b4..b9a346f3b 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -1,8 +1,7 @@ from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, AgentState, AgentEnum, AgentActivityEnum from ..interface.gem import GEMInterface from ..component import Component -from .perception_utils import pose_to_matrix, calculate_3d_iou -from .perception_utils_gem import * +from .perception_utils import * from typing import Dict, List, Optional, Tuple import rospy from message_filters import Subscriber, ApproximateTimeSynchronizer @@ -15,6 +14,12 @@ from geometry_msgs.msg import Quaternion import copy +from cv_bridge import CvBridge +from sensor_msgs.msg import PointCloud2, Image +from scipy.spatial.transform import Rotation as R +from ultralytics import YOLO +import cv2 +from .combined_detection_utils import add_bounding_box def average_yaw(yaw1, yaw2): @@ -223,10 +228,12 @@ class CombinedDetector3D(Component): def __init__( self, vehicle_interface: GEMInterface, - enable_tracking: bool = True, - use_start_frame: bool = True, + camera_name, + camera_calib_file, iou_threshold: float = 0.1, merge_mode: str = "Average", + enable_tracking: bool = True, + use_start_frame: bool = True, **kwargs ): self.vehicle_interface = vehicle_interface @@ -245,11 +252,39 @@ def __init__( self.yolo_topic = '/yolo_boxes' self.pp_topic = '/pointpillars_boxes' - self.debug = False rospy.loginfo(f"CombinedDetector3D Initialized. Subscribing to '{self.yolo_topic}' and '{self.pp_topic}'.") rospy.loginfo(f"Using merge mode: {self.merge_mode}") + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.camera_name = camera_name + self.camera_front = (self.camera_name == 'front') + self.score_threshold = 0.4 + self.debug = True + + # Load camera intrinsics/extrinsics from YAML + with open(camera_calib_file, 'r') as f: + calib = yaml.safe_load(f) + + # Expect structure: + # cameras: + # front: + # K: [[...], [...], [...]] + # D: [...] + # T_l2c: [[...], ..., [...]] + cam_cfg = calib['cameras'][self.camera_name] + self.K = np.array(cam_cfg['K']) + self.D = np.array(cam_cfg['D']) + self.T_l2c = np.array(cam_cfg['T_l2c']) + self.T_l2v = np.array(cam_cfg['T_l2v']) + + self.undistort_map1 = None + self.undistort_map2 = None + self.camera_front = (self.camera_name == 'front') + + def rate(self) -> float: return 8.0 @@ -261,6 +296,33 @@ def state_outputs(self) -> list: def initialize(self): """Initialize subscribers and publishers.""" + # # --- Determine the correct RGB topic for this camera --- + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + # add additional camera mappings here if needed + } + rgb_topic = rgb_topic_map.get( + self.camera_name, + f'/{self.camera_name}/rgb/image_raw' + ) + + # Create bounding box publisher + self.pub_yolo = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=1) + rospy.loginfo("YOLO node initialized and waiting for messages.") + + # Initialize the YOLO detector and move to GPU if available + self.detector = YOLO('yolov8n.pt') + self.detector.to('cuda') + + # Subscribe to the RGB and LiDAR streams + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer([ + self.rgb_sub, self.lidar_sub + ], queue_size=10, slop=0.1) + self.sync.registerCallback(self.synchronized_yolo_callback) + self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray) self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray) self.pub_fused = rospy.Publisher("/fused_boxes", BoundingBoxArray, queue_size=1) @@ -276,6 +338,171 @@ def initialize(self): self.sync.registerCallback(self.synchronized_callback) rospy.loginfo("CombinedDetector3D Subscribers Initialized.") + def synchronized_yolo_callback(self, image_msg, lidar_msg): + """Process synchronized RGB and LiDAR messages to detect pedestrians.""" + rospy.loginfo("Received synchronized RGB and LiDAR messages") + + # Convert image message to OpenCV format + try: + self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") + except Exception as e: + rospy.logerr(f"Failed to convert image: {e}") + self.latest_image = None + + # Convert LiDAR message to numpy array + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + + # Gate guards against data not being present for both sensors: + if self.latest_image is None or self.latest_lidar is None: + return {} # Skip + + latest_image = self.latest_image.copy() + + # Optionally downsample LiDAR points + downsample = False + if downsample: + lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) + else: + lidar_down = self.latest_lidar.copy() + + if self.camera_front == False: + start = time.time() + undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D) + end = time.time() + # print('-------processing time undistort_image---', end -start) + self.current_K = current_K + orig_H, orig_W = undistorted_img.shape[:2] + + # --- Begin modifications for three-angle detection --- + img_normal = undistorted_img + else: + img_normal = latest_image.copy() + undistorted_img = latest_image.copy() + orig_H, orig_W = latest_image.shape[:2] + self.current_K = self.K + + # Run YOLO detection on the image + results_normal = self.detector(img_normal, conf=0.4, classes=[0]) + combined_boxes = [] + + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + # for box in boxes_normal: + # cx, cy, w, h = box + # combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) + + start = time.time() + # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. + # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) + pts_cam = transform_points_l2c(lidar_down, self.T_l2c) + projected_pts = project_points(pts_cam, self.current_K, lidar_down) + # What is returned: + # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position) + # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) + # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame + + + # Create empty list of bounding boxes to fill and publish later + boxes = BoundingBoxArray() + boxes.header.frame_id = 'currentVehicleFrame' + boxes.header.stamp = lidar_msg.header.stamp + + # Process YOLO detections + boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] + conf_scores = np.array(results_normal[0].boxes.conf.cpu()) if len(results_normal) > 0 else [] + + for i, box in enumerate(boxes_normal): + # Skip low confidence detections + if conf_scores[i] < self.score_threshold: + continue + + # Calculate the 2D bounding box in the image + cx, cy, w, h = box + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + + # Find LiDAR points that project to this box + mask = ((projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & + (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)) + roi_pts = projected_pts[mask] + + # Ignore regions with too few points + if roi_pts.shape[0] < 5: + continue + + # Get the 3D points corresponding to the box + points_3d = roi_pts[:, 2:5] + points_3d = filter_depth_points(points_3d, max_depth_diff=0.8) + refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10) + refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.1) + + if refined_cluster.shape[0] < 5: + continue + + # Create a point cloud from the filtered points + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(refined_cluster) + + # Get an oriented bounding box + obb = pcd.get_oriented_bounding_box() + refined_center = obb.center + dims = tuple(obb.extent) + R_lidar = obb.R.copy() + + # We are assuming that dims[0] is height and dims[2] is length of obb.extent + + # Transform from LiDAR to vehicle coordinates + refined_center_hom = np.append(refined_center, 1) + refined_center_vehicle_hom = self.T_l2v @ refined_center_hom + refined_center_vehicle = refined_center_vehicle_hom[:3] + + # Calculate rotation in vehicle frame + R_vehicle = self.T_l2v[:3, :3] @ R_lidar + # yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) + yaw = np.arctan2(R_vehicle[1, 0], R_vehicle[0, 0]) + + # Add the bounding box + boxes = add_bounding_box( + boxes=boxes, + frame_id='currentVehicleFrame', + stamp=lidar_msg.header.stamp, + x=refined_center_vehicle[0], + y=refined_center_vehicle[1], + z=refined_center_vehicle[2], + l=dims[2], # length + w=dims[1], # width + h=dims[0], # height + yaw=yaw, + conf_score=float(conf_scores[i]), + label=0 # person/pedestrian class + ) + + rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, " + f"{refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) " + f"with score {conf_scores[i]:.2f}") + + # Publish the bounding boxes + rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes") + self.pub_yolo.publish(boxes) + + def undistort_image(self, image, K, D): + """Undistort an image using the camera calibration parameters.""" + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + + # Initialize undistortion maps if not already done + if self.undistort_map1 is None or self.undistort_map2 is None: + self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, + newCameraMatrix=newK, size=(w, h), + m1type=cv2.CV_32FC1) + + start = time.time() + undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST) + end = time.time() + # print('--------undistort', end-start) + return undistorted, newK + def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: BoundingBoxArray): """Callback for synchronized YOLO and PointPillars messages.""" self.latest_yolo_bbxs = yolo_bbxs_msg diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index af1280d67..5d98dbfe3 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -3,7 +3,6 @@ from ..interface.gem import GEMInterface from ..component import Component from .perception_utils import * -from .perception_utils_gem import * from ultralytics import YOLO import cv2 from typing import Dict diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py index 8f1e41131..b7c748967 100644 --- a/GEMstack/onboard/perception/pedestrian_detection.py +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -2,7 +2,6 @@ from ..interface.gem import GEMInterface from ..component import Component from .perception_utils import * # If you want to alias functions for clarity, do so in perception_utils -from .perception_utils_gem import * from ultralytics import YOLO from typing import Dict import open3d as o3d diff --git a/GEMstack/onboard/perception/perception_utils.py b/GEMstack/onboard/perception/perception_utils.py index af246c15d..517f97bb9 100644 --- a/GEMstack/onboard/perception/perception_utils.py +++ b/GEMstack/onboard/perception/perception_utils.py @@ -7,10 +7,44 @@ import sensor_msgs.point_cloud2 as pc2 import ros_numpy import math - +from ...state import ObjectPose, AgentState +from typing import Dict +import numpy as np # ----- Helper Functions ----- + +def match_existing_cone( + new_center: np.ndarray, + new_dims: tuple, + existing_agents: Dict[str, AgentState], + distance_threshold: float = 1.0 +) -> str: + """ + Find the closest existing Cone agent within a specified distance threshold. + """ + best_agent_id = None + best_dist = float('inf') + for agent_id, agent_state in existing_agents.items(): + old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) + dist = np.linalg.norm(new_center - old_center) + if dist < distance_threshold and dist < best_dist: + best_dist = dist + best_agent_id = agent_id + return best_agent_id + +def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: + """ + Compute the (vx, vy, vz) velocity based on change in pose over time. + """ + if dt <= 0: + return (0, 0, 0) + vx = (new_pose.x - old_pose.x) / dt + vy = (new_pose.y - old_pose.y) / dt + vz = (new_pose.z - old_pose.z) / dt + return (vx, vy, vz) + + def cylindrical_roi(points, center, radius, height): horizontal_dist = np.linalg.norm(points[:, :2] - center[:2], axis=1) vertical_diff = np.abs(points[:, 2] - center[2]) diff --git a/GEMstack/onboard/perception/perception_utils_gem.py b/GEMstack/onboard/perception/perception_utils_gem.py deleted file mode 100644 index e68e7db0c..000000000 --- a/GEMstack/onboard/perception/perception_utils_gem.py +++ /dev/null @@ -1,35 +0,0 @@ -from ...state import ObjectPose, AgentState -from typing import Dict -import numpy as np -# Had to move these functions to a seperate file because they use relative imports - - -def match_existing_cone( - new_center: np.ndarray, - new_dims: tuple, - existing_agents: Dict[str, AgentState], - distance_threshold: float = 1.0 -) -> str: - """ - Find the closest existing Cone agent within a specified distance threshold. - """ - best_agent_id = None - best_dist = float('inf') - for agent_id, agent_state in existing_agents.items(): - old_center = np.array([agent_state.pose.x, agent_state.pose.y, agent_state.pose.z]) - dist = np.linalg.norm(new_center - old_center) - if dist < distance_threshold and dist < best_dist: - best_dist = dist - best_agent_id = agent_id - return best_agent_id - -def compute_velocity(old_pose: ObjectPose, new_pose: ObjectPose, dt: float) -> tuple: - """ - Compute the (vx, vy, vz) velocity based on change in pose over time. - """ - if dt <= 0: - return (0, 0, 0) - vx = (new_pose.x - old_pose.x) / dt - vy = (new_pose.y - old_pose.y) / dt - vz = (new_pose.z - old_pose.z) / dt - return (vx, vy, vz) \ No newline at end of file From 10d9e023cdc1297716643089bcc03453edeea203 Mon Sep 17 00:00:00 2001 From: LucasEby Date: Sat, 17 May 2025 09:37:07 -0500 Subject: [PATCH 8/8] Improved YOLO integration into combined detector. Uploaded yaml file that was missed. Removed YOLO node that was missed. --- .../onboard/perception/combined_detection.py | 85 ++++-- GEMstack/onboard/perception/yolo_node.py | 265 ------------------ launch/combined_detection.yaml | 133 +++++++++ 3 files changed, 195 insertions(+), 288 deletions(-) delete mode 100644 GEMstack/onboard/perception/yolo_node.py create mode 100644 launch/combined_detection.yaml diff --git a/GEMstack/onboard/perception/combined_detection.py b/GEMstack/onboard/perception/combined_detection.py index b9a346f3b..8aa5b9b23 100644 --- a/GEMstack/onboard/perception/combined_detection.py +++ b/GEMstack/onboard/perception/combined_detection.py @@ -231,16 +231,20 @@ def __init__( camera_name, camera_calib_file, iou_threshold: float = 0.1, + score_threshold: float = 0.4, merge_mode: str = "Average", enable_tracking: bool = True, use_start_frame: bool = True, + slop = 0.1, + max_bb_buffer_size = 10, + visualize = True, **kwargs ): self.vehicle_interface = vehicle_interface self.tracked_agents: Dict[str, AgentState] = {} self.ped_counter = 0 - self.latest_yolo_bbxs: Optional[BoundingBoxArray] = None self.latest_pp_bbxs: Optional[BoundingBoxArray] = None + self.latest_yolo_bbxs: Optional[BoundingBoxArray] = None self.start_pose_abs: Optional[ObjectPose] = None self.start_time: Optional[float] = None @@ -261,8 +265,8 @@ def __init__( self.bridge = CvBridge() self.camera_name = camera_name self.camera_front = (self.camera_name == 'front') - self.score_threshold = 0.4 - self.debug = True + self.score_threshold = score_threshold + self.visualize = visualize # Load camera intrinsics/extrinsics from YAML with open(camera_calib_file, 'r') as f: @@ -283,7 +287,10 @@ def __init__( self.undistort_map1 = None self.undistort_map2 = None self.camera_front = (self.camera_name == 'front') - + self.yolo_buffer: List[BoundingBoxArray] = [] + self.pp_buffer: List[BoundingBoxArray] = [] + self.max_bb_buffer_size = max_bb_buffer_size + self.slop = slop def rate(self) -> float: return 8.0 @@ -324,20 +331,52 @@ def initialize(self): self.sync.registerCallback(self.synchronized_yolo_callback) self.yolo_sub = Subscriber(self.yolo_topic, BoundingBoxArray) - self.pp_sub = Subscriber(self.pp_topic, BoundingBoxArray) + rospy.Subscriber(self.pp_topic, BoundingBoxArray, self.store_pp_array, queue_size=10) self.pub_fused = rospy.Publisher("/fused_boxes", BoundingBoxArray, queue_size=1) - queue_size = 10 - slop = 0.1 - - self.sync = ApproximateTimeSynchronizer( - [self.yolo_sub, self.pp_sub], - queue_size=queue_size, - slop=slop - ) - self.sync.registerCallback(self.synchronized_callback) rospy.loginfo("CombinedDetector3D Subscribers Initialized.") + def add_bb_array(self, bb_buffer: List[BoundingBoxArray], add_arr: BoundingBoxArray): + bb_buffer.append(add_arr) + + # If buffer exceeds max size, remove the YOLO bounding box array with the oldest timestamp + if len(bb_buffer) > self.max_bb_buffer_size: + oldest_index = min(range(len(bb_buffer)), + key=lambda i: bb_buffer[i].header.stamp.to_sec()) + del bb_buffer[oldest_index] + + def get_bb_arrays_sorted(self, bb_buffer: List[BoundingBoxArray]) -> List[BoundingBoxArray]: + return sorted(bb_buffer, key=lambda curr_bb_arr: curr_bb_arr.header.stamp.to_sec()) + + def store_pp_array(self, bbxs_msg: BoundingBoxArray): + self.add_bb_array(bb_buffer=self.pp_buffer, add_arr=bbxs_msg) # Store the boxes array for later comparison to YOLO bounding box array + self.try_match() + + def store_yolo_array(self, bbxs_msg: BoundingBoxArray): + self.add_bb_array(bb_buffer=self.yolo_buffer, add_arr=bbxs_msg) # Store the boxes array for later comparison to PointPillars bounding box array + self.try_match() + + def try_match(self): + matched = [] + + for i, yolo_bbxs in enumerate(self.yolo_buffer): + time_yolo = yolo_bbxs.header.stamp.to_sec() + + for j, pp_bbxs in enumerate(self.pp_buffer): + time_pp = pp_bbxs.header.stamp.to_sec() + + if abs(time_yolo - time_pp) <= self.slop: + matched_pair = (copy.deepcopy(yolo_bbxs), copy.deepcopy(pp_bbxs)) + matched.append((i, j, matched_pair)) + break # We only want one match + + # Remove the match messages + for i, j, (yolo_bbxs, pp_bbxs) in reversed(matched): + del self.yolo_buffer[i] + del self.pp_buffer[j] + self.latest_pp_bbxs = pp_bbxs + self.latest_yolo_bbxs = yolo_bbxs + def synchronized_yolo_callback(self, image_msg, lidar_msg): """Process synchronized RGB and LiDAR messages to detect pedestrians.""" rospy.loginfo("Received synchronized RGB and LiDAR messages") @@ -482,9 +521,13 @@ def synchronized_yolo_callback(self, image_msg, lidar_msg): f"{refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) " f"with score {conf_scores[i]:.2f}") - # Publish the bounding boxes - rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes") - self.pub_yolo.publish(boxes) + + self.store_yolo_array(bbxs_msg=boxes) # Store the boxes array for later comparison to PointPillars bounding box array + + # Publish the bounding boxes for visualization + if self.visualize: + rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes") + self.pub_yolo.publish(boxes) def undistort_image(self, image, K, D): """Undistort an image using the camera calibration parameters.""" @@ -503,11 +546,6 @@ def undistort_image(self, image, K, D): # print('--------undistort', end-start) return undistorted, newK - def synchronized_callback(self, yolo_bbxs_msg: BoundingBoxArray, pp_bbxs_msg: BoundingBoxArray): - """Callback for synchronized YOLO and PointPillars messages.""" - self.latest_yolo_bbxs = yolo_bbxs_msg - self.latest_pp_bbxs = pp_bbxs_msg - def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: """Update function called by the GEMstack pipeline.""" current_time = self.vehicle_interface.time() @@ -613,7 +651,8 @@ def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: agents[agent_id] = new_agent self.tracked_agents[agent_id] = new_agent - self.pub_fused.publish(fused_bb_array) + if self.visualize: + self.pub_fused.publish(fused_bb_array) stale_ids = [agent_id for agent_id, agent in self.tracked_agents.items() if current_time - agent.pose.t > 5.0] diff --git a/GEMstack/onboard/perception/yolo_node.py b/GEMstack/onboard/perception/yolo_node.py deleted file mode 100644 index c4216884c..000000000 --- a/GEMstack/onboard/perception/yolo_node.py +++ /dev/null @@ -1,265 +0,0 @@ -from perception_utils import * -from combined_detection_utils import add_bounding_box -from ultralytics import YOLO -import cv2 -from scipy.spatial.transform import Rotation as R -import rospy -from sensor_msgs.msg import PointCloud2, Image -from message_filters import Subscriber, ApproximateTimeSynchronizer -from cv_bridge import CvBridge -import time -import os -import yaml - -# Publisher imports: -from jsk_recognition_msgs.msg import BoundingBoxArray -# from geometry_msgs.msg import Pose, Vector3 - - -class YoloNode(): - """ - Detects Pedestrians by fusing YOLO 2D detections with LiDAR point cloud - data by painting the points. The painted data is converted to vehicle - frame and then published as a list of bounding boxes. - - Tracking is optional: set `enable_tracking=False` to disable persistent tracking - and return only detections from the current frame. - """ - - def __init__(self, camera_calib_file: str): - self.latest_image = None - self.latest_lidar = None - self.bridge = CvBridge() - self.camera_name = 'front' - self.camera_front = (self.camera_name == 'front') - self.score_threshold = 0.4 - self.debug = True - - # Load camera intrinsics/extrinsics from YAML - with open(camera_calib_file, 'r') as f: - calib = yaml.safe_load(f) - - # Expect structure: - # cameras: - # front: - # K: [[...], [...], [...]] - # D: [...] - # T_l2c: [[...], ..., [...]] - cam_cfg = calib['cameras'][self.camera_name] - self.K = np.array(cam_cfg['K']) - self.D = np.array(cam_cfg['D']) - self.T_l2c = np.array(cam_cfg['T_l2c']) - self.T_l2v = np.array(cam_cfg['T_l2v']) - - self.undistort_map1 = None - self.undistort_map2 = None - self.camera_front = (self.camera_name == 'front') - - self.initialize() - - def initialize(self): - """Initialize the YOLO node with camera calibration and ROS connections.""" - # # --- Determine the correct RGB topic for this camera --- - rgb_topic_map = { - 'front': '/oak/rgb/image_raw', - 'front_right': '/camera_fr/arena_camera_node/image_raw', - # add additional camera mappings here if needed - } - rgb_topic = rgb_topic_map.get( - self.camera_name, - f'/{self.camera_name}/rgb/image_raw' - ) - - # Initialize YOLO node - rospy.init_node('yolo_box_publisher') - - # Create bounding box publisher - self.pub = rospy.Publisher('/yolo_boxes', BoundingBoxArray, queue_size=1) - rospy.loginfo("YOLO node initialized and waiting for messages.") - - # Initialize the YOLO detector and move to GPU if available - self.detector = YOLO('yolov8n.pt') - self.detector.to('cuda') - - # Subscribe to the RGB and LiDAR streams - self.rgb_sub = Subscriber(rgb_topic, Image) - self.lidar_sub = Subscriber('/ouster/points', PointCloud2) - self.sync = ApproximateTimeSynchronizer([ - self.rgb_sub, self.lidar_sub - ], queue_size=10, slop=0.1) - self.sync.registerCallback(self.synchronized_callback) - - def synchronized_callback(self, image_msg, lidar_msg): - """Process synchronized RGB and LiDAR messages to detect pedestrians.""" - rospy.loginfo("Received synchronized RGB and LiDAR messages") - - # Convert image message to OpenCV format - try: - self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") - except Exception as e: - rospy.logerr(f"Failed to convert image: {e}") - self.latest_image = None - - # Convert LiDAR message to numpy array - self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) - - # Gate guards against data not being present for both sensors: - if self.latest_image is None or self.latest_lidar is None: - return {} # Skip - - latest_image = self.latest_image.copy() - - # Optionally downsample LiDAR points - downsample = False - if downsample: - lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) - else: - lidar_down = self.latest_lidar.copy() - - if self.camera_front == False: - start = time.time() - undistorted_img, current_K = self.undistort_image(lastest_image, self.K, self.D) - end = time.time() - # print('-------processing time undistort_image---', end -start) - self.current_K = current_K - orig_H, orig_W = undistorted_img.shape[:2] - - # --- Begin modifications for three-angle detection --- - img_normal = undistorted_img - else: - img_normal = latest_image.copy() - undistorted_img = latest_image.copy() - orig_H, orig_W = latest_image.shape[:2] - self.current_K = self.K - - # Run YOLO detection on the image - results_normal = self.detector(img_normal, conf=0.4, classes=[0]) - combined_boxes = [] - - boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] - # for box in boxes_normal: - # cx, cy, w, h = box - # combined_boxes.append((cx, cy, w, h, AgentActivityEnum.STANDING)) - - start = time.time() - # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. - # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) - pts_cam = transform_points_l2c(lidar_down, self.T_l2c) - projected_pts = project_points(pts_cam, self.current_K, lidar_down) - # What is returned: - # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position) - # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) - # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame - - - # Create empty list of bounding boxes to fill and publish later - boxes = BoundingBoxArray() - boxes.header.frame_id = 'currentVehicleFrame' - boxes.header.stamp = lidar_msg.header.stamp - - # Process YOLO detections - boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] - conf_scores = np.array(results_normal[0].boxes.conf.cpu()) if len(results_normal) > 0 else [] - - for i, box in enumerate(boxes_normal): - # Skip low confidence detections - if conf_scores[i] < self.score_threshold: - continue - - # Calculate the 2D bounding box in the image - cx, cy, w, h = box - left = int(cx - w / 2) - right = int(cx + w / 2) - top = int(cy - h / 2) - bottom = int(cy + h / 2) - - # Find LiDAR points that project to this box - mask = ((projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & - (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)) - roi_pts = projected_pts[mask] - - # Ignore regions with too few points - if roi_pts.shape[0] < 5: - continue - - # Get the 3D points corresponding to the box - points_3d = roi_pts[:, 2:5] - points_3d = filter_depth_points(points_3d, max_depth_diff=0.8) - refined_cluster = refine_cluster(points_3d, np.mean(points_3d, axis=0), eps=0.15, min_samples=10) - refined_cluster = remove_ground_by_min_range(refined_cluster, z_range=0.1) - - if refined_cluster.shape[0] < 5: - continue - - # Create a point cloud from the filtered points - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(refined_cluster) - - # Get an oriented bounding box - obb = pcd.get_oriented_bounding_box() - refined_center = obb.center - dims = tuple(obb.extent) - R_lidar = obb.R.copy() - - # We are assuming that dims[0] is height and dims[2] is length of obb.extent - - # Transform from LiDAR to vehicle coordinates - refined_center_hom = np.append(refined_center, 1) - refined_center_vehicle_hom = self.T_l2v @ refined_center_hom - refined_center_vehicle = refined_center_vehicle_hom[:3] - - # Calculate rotation in vehicle frame - R_vehicle = self.T_l2v[:3, :3] @ R_lidar - # yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) - yaw = np.arctan2(R_vehicle[1, 0], R_vehicle[0, 0]) - - # Add the bounding box - boxes = add_bounding_box( - boxes=boxes, - frame_id='currentVehicleFrame', - stamp=lidar_msg.header.stamp, - x=refined_center_vehicle[0], - y=refined_center_vehicle[1], - z=refined_center_vehicle[2], - l=dims[2], # length - w=dims[1], # width - h=dims[0], # height - yaw=yaw, - conf_score=float(conf_scores[i]), - label=0 # person/pedestrian class - ) - - rospy.loginfo(f"Person detected at ({refined_center_vehicle[0]:.2f}, " - f"{refined_center_vehicle[1]:.2f}, {refined_center_vehicle[2]:.2f}) " - f"with score {conf_scores[i]:.2f}") - - # Publish the bounding boxes - rospy.loginfo(f"Publishing {len(boxes.boxes)} person bounding boxes") - self.pub.publish(boxes) - - def undistort_image(self, image, K, D): - """Undistort an image using the camera calibration parameters.""" - h, w = image.shape[:2] - newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) - - # Initialize undistortion maps if not already done - if self.undistort_map1 is None or self.undistort_map2 is None: - self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, - newCameraMatrix=newK, size=(w, h), - m1type=cv2.CV_32FC1) - - start = time.time() - undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST) - end = time.time() - # print('--------undistort', end-start) - return undistorted, newK - - -if __name__ == '__main__': - try: - current_dir = os.path.dirname(__file__) - yaml_path = os.path.join(current_dir, '..', '..', 'knowledge', 'calibration', 'cameras.yaml') - node = YoloNode(yaml_path) - rospy.spin() - except rospy.ROSInterruptException: - pass diff --git a/launch/combined_detection.yaml b/launch/combined_detection.yaml new file mode 100644 index 000000000..beec55112 --- /dev/null +++ b/launch/combined_detection.yaml @@ -0,0 +1,133 @@ +description: "Run the yielding trajectory planner on the real vehicle with real perception" +mode: hardware +vehicle_interface: gem_hardware.GEMHardwareInterface +mission_execution: StandardExecutor + +# Recovery behavior after a component failure +recovery: + planning: + trajectory_tracking : recovery.StopTrajectoryTracker + +# Driving behavior for the GEM vehicle. Runs real pedestrian perception, yield planner, but does not send commands to real vehicle. +drive: + perception: + state_estimation : GNSSStateEstimator + agent_detection : + type: combined_detection.CombinedDetector3D + args: + # optional overrides + camera_name: front #[front, front_right] + camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml + iou_threshold: 0.01 # Set to the IOU threshold used to compare bounding boxes + score_threshold: 0.4 # The minimum score required for a detection + merge_mode: "BEV" # Merging strategy to use: "Average", "Score", "Max", or "BEV" + enable_tracking: True # True if you want to enable tracking + use_start_frame: False # True to output in START frame + slop: 0.1 # The maximum acceptable time difference when comparing PointPillars and YOLO bounding box time stamps + max_bb_buffer_size: 10 # The maximum size of the buffers used to store YOLO and PointPillars BoundingBoxArrays + visualize: True # Enable to publish bounding boxes to vizualize in RVIZ + + perception_normalization : StandardPerceptionNormalizer + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'real' + params: { + 'yielder': 'expert', # 'expert', 'analytic', or 'simulation' + 'planner': 'milestone', # 'milestone', 'dt', or 'dx' + 'desired_speed': 1.0, # m/s, 1.5 m/s seems max speed? Feb24 + 'acceleration': 0.75 # m/s2, 0.5 is not enough to start moving. Feb24 + } + route_planning: + type: StaticRoutePlanner + args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start'] + motion_planning: longitudinal_planning.YieldTrajectoryPlanner + trajectory_tracking: + type: pure_pursuit.PurePursuitTrajectoryTracker + print: False + + +log: + # Specify the top-level folder to save the log files. Default is 'logs' + #folder : 'logs' + # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix + #prefix : 'fixed_route_' + # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix + #suffix : 'test3' + # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics. + ros_topics : + # Specify options to pass to rosbag record. Default is no options. + #rosbag_options : '--split --size=1024' + # If True, then record all readings / commands of the vehicle interface. Default False + vehicle_interface : True + # Specify which components to record to behavior.json. Default records nothing + components : ['state_estimation','agent_detection','motion_planning'] + # Specify which components of state to record to state.json. Default records nothing + #state: ['all'] + # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate + #state_rate: 10 + # If True, then make plots of the recorded data specifically from PurePursuitTrajectoryTracker_debug.csv and report metrics. Default False + #auto_plot : True +replay: # Add items here to set certain topics / inputs to be replayed from logs + # Specify which log folder to replay from + log: + ros_topics : [] + components : [] + +#usually can keep this constant +computation_graph: !include "../GEMstack/knowledge/defaults/computation_graph.yaml" + +variants: + detector_only: + run: + description: "Run the pedestrian detection code" + drive: + planning: + trajectory_tracking: + real_sim: + run: + description: "Run the pedestrian detection code with real detection and fake simulation" + mode: hardware + vehicle_interface: + type: gem_mixed.GEMRealSensorsWithSimMotionInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + mission_execution: StandardExecutor + require_engaged: False + visualization: !include "klampt_visualization.yaml" + drive: + perception: + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim' + + fake_real: + run: + description: "Run the yielding trajectory planner on the real vehicle with faked perception" + drive: + perception: + agent_detection : cone_detection.FakeCombinedDetector2D + + fake_sim: + run: + description: "Run the yielding trajectory planner in simulation with faked perception" + mode: simulation + vehicle_interface: + type: gem_simulator.GEMDoubleIntegratorSimulationInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + visualization: !include "klampt_visualization.yaml" + drive: + perception: + # agent_detection : pedestrian_detection.FakePedestrianDetector2D + agent_detection : OmniscientAgentDetector #this option reads agents from the simulator + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim'