From 09dc046d7ca6662666f4b9efe51eb6c9d27888ef Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 16 May 2025 09:32:20 -0500
Subject: [PATCH 1/5] Create README.md

---
 GEMstack/knowledge/detection/README.md | 37 ++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 GEMstack/knowledge/detection/README.md

diff --git a/GEMstack/knowledge/detection/README.md b/GEMstack/knowledge/detection/README.md
new file mode 100644
index 00000000..fdb9bf86
--- /dev/null
+++ b/GEMstack/knowledge/detection/README.md
@@ -0,0 +1,37 @@
+# Autonomous Vehicles Final Project: Perception Models
+
+Link to Google Drive where all models are stored:  
+https://drive.google.com/drive/folders/11oXlVTWNcYcLQbluM8bh6MY80lCBoJ0d?usp=drive_link
+
+## Contributed Models
+
+### Cone Detection – Bounding Box Models
+
+1. **Cone_Detector_30m_Under_YOLOv8.pt**  
+   - Same as 'cone.pt' in the folder. Our default model. Good for most test scenarios. 
+   - Optimized for detecting cones within 30 meters. This range is similiar to lidar's effective range for cone detection.  
+   - Achieves 100% recall and precision with tightly fitting bounding boxes when cone <25m
+   - Limitation: fails to consistently detect cones beyond 30 meters  
+
+2. **Cone_Detector_30m_Plus_YOLOv8.pt**  
+   - Designed for cone detection beyond 30 meters, with high precision, recall, and mAP  
+   - Overfitting observed due to training data redundancy, less reliable for closer range
+   - Serves as a strong baseline for long-range cone detection tasks  
+
+## Parking Space Detection – Lane-Based Segmentation Models
+
+Models:  
+1. **parking_space_YOLO11n-seg.pt**  
+2. **parking_space_YOLO11s-seg.pt**  
+3. **parking_space_yolov8n-seg.pt**  
+
+These models perform well on the original parking lot layout, accurately segmenting lanes and identifying available spaces. However, they do not generalize well to newly modified parking lots with different lane markings. To improve generalization, additional labeled images from the updated environment are required.
+
+> **Note:** While **YOLO11s-seg** currently shows the lowest performance among the three models, its larger architecture offers the most potential for improvement. With more diverse training data and fine-tuning, it could outperform both YOLOv8n-seg and YOLO11n-seg in accuracy and generalization.
+
+| Model             | mAP@50 | mAP@50–95 | Benefits & Tradeoffs                              |
+|-------------------|--------|-----------|---------------------------------------------------|
+| Base model        | N/A    | N/A       | Placeholder for comparison; not evaluated         |
+| YOLOv8n-seg       | 0.886  | 0.634     | Fast, modern YOLOv8 variant; efficient and compact |
+| YOLO11n-seg       | 0.905  | 0.703     | Very fast and lightweight; great for real-time use |
+| YOLO11s-seg       | 0.899  | 0.685     | Balanced option with good accuracy and speed      |

From f9ceb76d0a1a5c15eb663499dc13b01fe02526a2 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 16 May 2025 09:34:40 -0500
Subject: [PATCH 2/5] Update cone_detection.py

---
 GEMstack/onboard/perception/cone_detection.py | 346 +++++++++---------
 1 file changed, 174 insertions(+), 172 deletions(-)

diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py
index ab83d03d..69d1c0f7 100644
--- a/GEMstack/onboard/perception/cone_detection.py
+++ b/GEMstack/onboard/perception/cone_detection.py
@@ -1,8 +1,8 @@
-from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, Obstacle, ObstacleMaterialEnum, \
+from ...state import VehicleState, ObjectFrameEnum, Obstacle, ObstacleMaterialEnum, \
     ObstacleStateEnum
 from ..interface.gem import GEMInterface
 from ..component import Component
-from .perception_utils import *
+from GEMstack.onboard.perception.utils.perception_utils import *
 from ultralytics import YOLO
 import cv2
 from typing import Dict
@@ -13,20 +13,15 @@
 from sensor_msgs.msg import PointCloud2, Image
 from message_filters import Subscriber, ApproximateTimeSynchronizer
 from cv_bridge import CvBridge
-import time
 import os
 import yaml
 
 
 class ConeDetector3D(Component):
     """
-    Detects cones by fusing YOLO 2D detections with LiDAR point cloud data.
-
-    Tracking is optional: set `enable_tracking=False` to disable persistent tracking
-    and return only detections from the current frame.
-
-    Supports multiple cameras; each camera’s intrinsics and extrinsics are
-    loaded from a single YAML calibration file via plain PyYAML.
+    Detects traffic cones by fusing YOLO 2D detections with LiDAR point cloud data.
+    Tracking is optional: disable persistent tracking via `enable_tracking`.
+    Supports multiple cameras; each camera’s intrinsics/extrinsics are loaded via YAML.
     """
 
     def __init__(
@@ -40,20 +35,37 @@ def __init__(
             save_data: bool = True,
             orientation: bool = True,
             use_start_frame: bool = True,
+            # Processing parameters with defaults
+            model_path: str = 'GEMstack/knowledge/detection/cone.pt',
+            downsample: bool = False,
+            conf_normal: float = 0.35,
+            conf_left: float = 0.15,
+            conf_right: float = 0.15,
+            max_depth: float = 40,
+            z_range: float = 0.08,
+            max_depth_diff: float = 0.5,
+            alpha: float = 0.1,
+            expiring_time: float = 20.0,
+            update_rate: int = 8,
             **kwargs
     ):
-        # Core interfaces and state
+        # --- Core interfaces ---
         self.vehicle_interface = vehicle_interface
+        # Containers for current and tracked obstacles
         self.current_obstacles = {}
         self.tracked_obstacles = {}
-        self.cone_counter = 0
+        self.cone_counter = 0  # unique ID counter
+        self.update_rate = update_rate
+        # Latest sensor data placeholders
         self.latest_image = None
         self.latest_lidar = None
         self.bridge = CvBridge()
+
+        # Start pose & time references
         self.start_pose_abs = None
         self.start_time = None
 
-        # Config flags
+        # --- Config flags from init args ---
         self.camera_name = camera_name
         self.enable_tracking = enable_tracking
         self.visualize_2d = visualize_2d
@@ -61,51 +73,56 @@ def __init__(
         self.save_data = save_data
         self.orientation = orientation
         self.use_start_frame = use_start_frame
-
-        # 2) Load camera intrinsics/extrinsics from the supplied YAML
+        # Processing parameters
+        self.downsample = downsample
+        self.conf_normal = conf_normal
+        self.conf_left = conf_left
+        self.conf_right = conf_right
+        self.max_depth = max_depth
+        self.z_range = z_range
+        self.max_depth_diff = max_depth_diff
+        self.alpha = alpha
+        self.expiring_time = expiring_time
+        self.model_path = model_path
+        # --- Load camera calibration (intrinsics/extrinsics) ---
         with open(camera_calib_file, 'r') as f:
             calib = yaml.safe_load(f)
-
-        # Expect structure:
-        # cameras:
-        #   front:
-        #     K:   [[...], [...], [...]]
-        #     D:   [...]
-        #     T_l2c: [[...], ..., [...]]
         cam_cfg = calib['cameras'][camera_name]
-        self.K = np.array(cam_cfg['K'])
-        self.D = np.array(cam_cfg['D'])
-        self.T_l2c = np.array(cam_cfg['T_l2c'])
-        self.T_l2v = np.array(cam_cfg['T_l2v'])
-
-        # Derived transforms
+        self.K = np.array(cam_cfg['K'])           # Camera intrinsic matrix
+        self.D = np.array(cam_cfg['D'])           # Distortion coefficients
+        self.T_l2c = np.array(cam_cfg['T_l2c'])   # LiDAR-to-camera transform
+        self.T_l2v = np.array(cam_cfg['T_l2v'])   # LiDAR-to-vehicle transform
 
+        # Placeholders for undistortion maps
         self.undistort_map1 = None
         self.undistort_map2 = None
         self.camera_front = (camera_name == 'front')
 
     def rate(self) -> float:
-        return 8
+        # Desired update frequency (Hz)
+        return self.update_rate
 
     def state_inputs(self) -> list:
+        # Inputs required: vehicle state
         return ['vehicle']
 
     def state_outputs(self) -> list:
+        # Outputs provided: detected obstacles
         return ['obstacles']
 
     def initialize(self):
-        # --- Determine the correct RGB topic for this camera ---
+        # Determine the correct ROS topic for the camera
         rgb_topic_map = {
             'front': '/oak/rgb/image_raw',
             'front_right': '/camera_fr/arena_camera_node/image_raw',
-            # add additional camera mappings here if needed
+            'front_left': '/camera_fl/arena_camera_node/image_raw',
         }
         rgb_topic = rgb_topic_map.get(
             self.camera_name,
             f'/{self.camera_name}/rgb/image_raw'
         )
 
-        # Subscribe to the RGB and LiDAR streams
+        # Subscribe to synchronized RGB and LiDAR streams
         self.rgb_sub = Subscriber(rgb_topic, Image)
         self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
         self.sync = ApproximateTimeSynchronizer([
@@ -113,119 +130,114 @@ def initialize(self):
         ], queue_size=500, slop=0.03)
         self.sync.registerCallback(self.synchronized_callback)
 
-        # Initialize the YOLO detector
-        self.detector = YOLO('GEMstack/knowledge/detection/cone.pt')
+        # Initialize the YOLO detector model on GPU
+        self.detector = YOLO(self.model_path)
         self.detector.to('cuda')
+
+        # Precompute camera-to-lidar transform and camera origin
         self.T_c2l = np.linalg.inv(self.T_l2c)
         self.R_c2l = self.T_c2l[:3, :3]
         self.camera_origin_in_lidar = self.T_c2l[:3, 3]
 
     def synchronized_callback(self, image_msg, lidar_msg):
-        step1 = time.time()
+        # Callback to convert incoming ROS messages to numpy formats
         try:
             self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8")
         except Exception as e:
             rospy.logerr("Failed to convert image: {}".format(e))
             self.latest_image = None
-        step2 = time.time()
+        # Convert PointCloud2 to numpy array
         self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False)
-        step3 = time.time()
-        # print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2)
 
     def undistort_image(self, image, K, D):
+        # Remove lens distortion and rectify the image
         h, w = image.shape[:2]
         newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
         if self.undistort_map1 is None or self.undistort_map2 is None:
-            self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None,
-                                                                                   newCameraMatrix=newK, size=(w, h),
-                                                                                   m1type=cv2.CV_32FC1)
-
-        start = time.time()
-        undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST)
-        end = time.time()
-        # print('--------undistort', end-start)
+            self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(
+                K, D, R=None, newCameraMatrix=newK, size=(w, h), m1type=cv2.CV_32FC1
+            )
+        undistorted = cv2.remap(
+            image, self.undistort_map1, self.undistort_map2,
+            interpolation=cv2.INTER_NEAREST
+        )
         return undistorted, newK
 
     def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
-        downsample = False
-        # Gate guards against data not being present for both sensors:
+        # Return empty if data not yet received from both sensors
         if self.latest_image is None or self.latest_lidar is None:
             return {}
-        latest_image = self.latest_image.copy()
 
-        # Set up current time variables
-        start = time.time()
+        latest_image = self.latest_image.copy()
         current_time = self.vehicle_interface.time()
 
-        if downsample:
+        # Optionally downsample point cloud
+        if self.downsample:
             lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1)
         else:
             lidar_down = self.latest_lidar.copy()
 
+        # Initialize start time
         if self.start_time is None:
             self.start_time = current_time
         time_elapsed = current_time - self.start_time
 
-        # Ensure data/ exists and build timestamp
+        # Optionally save raw sensor data
         if self.save_data:
             self.save_sensor_data(vehicle=vehicle, latest_image=latest_image)
 
-        if self.camera_front == False:
-            start = time.time()
+        # Undistort if not using front camera
+        if not self.camera_front:
             undistorted_img, current_K = self.undistort_image(latest_image, self.K, self.D)
-            end = time.time()
-            # print('-------processing time undistort_image---', end -start)
-            self.current_K = current_K
-            orig_H, orig_W = undistorted_img.shape[:2]
-
-            # --- Begin modifications for three-angle detection ---
             img_normal = undistorted_img
         else:
             img_normal = latest_image.copy()
             undistorted_img = latest_image.copy()
-            orig_H, orig_W = latest_image.shape[:2]
-            self.current_K = self.K
-            # print(self.K)
-            # print(self.T_l2c)
-        results_normal = self.detector(img_normal, conf=0.35, classes=[0])
+            current_K = self.K
+
+        # 2D detection on normal orientation
+        results_normal = self.detector(img_normal, conf=self.conf_normal, classes=[0])
+
         combined_boxes = []
         if not self.enable_tracking:
+            # Reset ID counter if no tracking
             self.cone_counter = 0
+
+        # Optionally run detectors on rotated images for orientation
         if self.orientation:
             img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE)
             img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE)
-            results_left = self.detector(img_left, conf=0.15, classes=[0])
-            results_right = self.detector(img_right, conf=0.15, classes=[0])
+            results_left = self.detector(img_left, conf=self.conf_left, classes=[0])
+            results_right = self.detector(img_right, conf=self.conf_right, classes=[0])
             boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else []
             boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else []
+            # Transform rotated boxes back to normal image coords
             for box in boxes_left:
                 cx, cy, w, h = box
-                new_cx = orig_W - 1 - cy
+                new_cx = undistorted_img.shape[1] - 1 - cy
                 new_cy = cx
-                new_w = h  # Swap width and height.
-                new_h = w
-                combined_boxes.append((new_cx, new_cy, new_w, new_h, ObstacleStateEnum.RIGHT))
+                combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.RIGHT))
             for box in boxes_right:
                 cx, cy, w, h = box
                 new_cx = cy
-                new_cy = orig_H - 1 - cx
-                new_w = h  # Swap width and height.
-                new_h = w
-                combined_boxes.append((new_cx, new_cy, new_w, new_h, ObstacleStateEnum.LEFT))
+                new_cy = undistorted_img.shape[0] - 1 - cx
+                combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.LEFT))
 
+        # Add normal orientation detections
         boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else []
         for box in boxes_normal:
             cx, cy, w, h = box
             combined_boxes.append((cx, cy, w, h, ObstacleStateEnum.STANDING))
 
-        # Visualize the received images in 2D with their corresponding labels
-        # It draws rectangles and labels on the images:
-        if getattr(self, 'visualize_2d', False):
+        # 2D visualization if enabled
+        if self.visualize_2d:
+            # Draw bounding boxes and state labels on the image
             for (cx, cy, w, h, state) in combined_boxes:
                 left = int(cx - w / 2)
                 right = int(cx + w / 2)
                 top = int(cy - h / 2)
                 bottom = int(cy + h / 2)
+                color = (255, 255, 255)
                 if state == ObstacleStateEnum.STANDING:
                     color = (255, 0, 0)
                     label = "STANDING"
@@ -235,60 +247,53 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                 elif state == ObstacleStateEnum.LEFT:
                     color = (0, 0, 255)
                     label = "LEFT"
-                else:
-                    color = (255, 255, 255)
-                    label = "UNKNOWN"
                 cv2.rectangle(undistorted_img, (left, top), (right, bottom), color, 2)
                 cv2.putText(undistorted_img, label, (left, max(top - 5, 20)),
                             cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
             cv2.imshow("Detection - Cone 2D", undistorted_img)
 
-        start = time.time()
-        # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference.
-        # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects)
+        # Transform LiDAR points to camera frame and project to image
         pts_cam = transform_points_l2c(lidar_down, self.T_l2c)
-        projected_pts = project_points(pts_cam, self.current_K, lidar_down)
-        # What is returned:
-        # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position)
-        # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position)
-        # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame
-
-        end = time.time()
-        # print('-------processing time1---', end -start)
+        projected_pts = project_points(pts_cam, current_K, lidar_down)
 
         obstacles = {}
-
-        for i, box_info in enumerate(combined_boxes):
+        # Fuse 2D boxes with 3D points
+        for box_info in combined_boxes:
             cx, cy, w, h, state = box_info
-            # print(cx, cy, w, h)
+            # Define ROI in image coords
             left = int(cx - w / 1.6)
             right = int(cx + w / 1.6)
             top = int(cy - h / 2)
             bottom = int(cy + h / 2)
-            mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \
-                   (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)
+            mask = (
+                (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) &
+                (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom)
+            )
             roi_pts = projected_pts[mask]
-            # print(roi_pts)
             if roi_pts.shape[0] < 5:
                 continue
 
+            # Extract 3D points and apply depth filters
             points_3d = roi_pts[:, 2:5]
-
-            points_3d = filter_points_within_threshold(points_3d, 40)
-            points_3d = remove_ground_by_min_range(points_3d, z_range=0.08)
-            points_3d = filter_depth_points(points_3d, max_depth_diff=0.5)
+            points_3d = filter_points_within_threshold(points_3d, self.max_depth)
+            points_3d = remove_ground_by_min_range(points_3d, z_range=self.z_range)
+            points_3d = filter_depth_points(points_3d, max_depth_diff=self.max_depth_diff)
 
             if self.use_cyl_roi:
+                # Optionally refine with cylindrical ROI
                 global_filtered = filter_points_within_threshold(lidar_down, 30)
-                roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.4, height=1.2)
+                roi_cyl = cylindrical_roi(
+                    global_filtered, np.mean(points_3d, axis=0), radius=0.4, height=1.2
+                )
                 refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01)
                 refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.3)
             else:
                 refined_cluster = points_3d.copy()
-            # end1 = time.time()
+
             if refined_cluster.shape[0] < 4:
                 continue
 
+            # Compute oriented bounding box of the cluster
             pcd = o3d.geometry.PointCloud()
             pcd.points = o3d.utility.Vector3dVector(refined_cluster)
             obb = pcd.get_oriented_bounding_box()
@@ -296,14 +301,17 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
             dims = tuple(obb.extent)
             R_lidar = obb.R.copy()
 
+            # Transform center to vehicle frame
             refined_center_hom = np.append(refined_center, 1)
             refined_center_vehicle_hom = self.T_l2v @ refined_center_hom
             refined_center_vehicle = refined_center_vehicle_hom[:3]
 
+            # Compute orientation in vehicle frame
             R_vehicle = self.T_l2v[:3, :3] @ R_lidar
             yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False)
             refined_center = refined_center_vehicle
 
+            # Optionally convert to start frame
             if self.use_start_frame:
                 if self.start_pose_abs is None:
                     self.start_pose_abs = vehicle.pose
@@ -312,8 +320,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                     vehicle.pose,
                     self.start_pose_abs
                 )
-                T_vehicle_to_start = vehicle_start_pose.transform()
-                xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3]
+                xp, yp, zp = (vehicle_start_pose.transform() @ np.append(refined_center, 1))[:3]
                 out_frame = ObjectFrameEnum.START
             else:
                 xp, yp, zp = refined_center
@@ -326,7 +333,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                 frame=out_frame
             )
 
-            # --- Optional tracking ---
+            # Tracking: match to existing cone or add new
             if self.enable_tracking:
                 existing_id = match_existing_cone(
                     np.array([new_pose.x, new_pose.y, new_pose.z]),
@@ -335,39 +342,38 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                     distance_threshold=2
                 )
                 if existing_id is not None:
+                    # Smooth pose using exponential moving average
                     old_state = self.tracked_obstacles[existing_id]
-                    if vehicle.v < 100:
-                        alpha = 0.1
-                        avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x
-                        avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y
-                        avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z
-                        avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw
-                        avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch
-                        avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll
-
-                        updated_pose = ObjectPose(
-                            t=new_pose.t,
-                            x=avg_x,
-                            y=avg_y,
-                            z=avg_z,
-                            yaw=avg_yaw,
-                            pitch=avg_pitch,
-                            roll=avg_roll,
-                            frame=new_pose.frame
-                        )
-                        updated_obstacle = Obstacle(
-                            pose=updated_pose,
-                            dimensions=dims,
-                            outline=None,
-                            material=ObstacleMaterialEnum.TRAFFIC_CONE,
-                            state=state,
-                            collidable=True
-                        )
-                    else:
-                        updated_obstacle = old_state
+                    alpha = self.alpha
+                    avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x
+                    avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y
+                    avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z
+                    avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw
+                    avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch
+                    avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll
+
+                    updated_pose = ObjectPose(
+                        t=new_pose.t,
+                        x=avg_x,
+                        y=avg_y,
+                        z=avg_z,
+                        yaw=avg_yaw,
+                        pitch=avg_pitch,
+                        roll=avg_roll,
+                        frame=new_pose.frame
+                    )
+                    updated_obstacle = Obstacle(
+                        pose=updated_pose,
+                        dimensions=dims,
+                        outline=None,
+                        material=ObstacleMaterialEnum.TRAFFIC_CONE,
+                        state=state,
+                        collidable=True
+                    )
                     obstacles[existing_id] = updated_obstacle
                     self.tracked_obstacles[existing_id] = updated_obstacle
                 else:
+                    # Create new tracked obstacle
                     obstacle_id = f"Cone{self.cone_counter}"
                     self.cone_counter += 1
                     new_obstacle = Obstacle(
@@ -381,6 +387,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                     obstacles[obstacle_id] = new_obstacle
                     self.tracked_obstacles[obstacle_id] = new_obstacle
             else:
+                # No tracking: return only current frame detections
                 obstacle_id = f"Cone{self.cone_counter}"
                 self.cone_counter += 1
                 new_obstacle = Obstacle(
@@ -389,15 +396,20 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                     outline=None,
                     material=ObstacleMaterialEnum.TRAFFIC_CONE,
                     state=state,
-                    collidable = True
+                    collidable=True
                 )
                 obstacles[obstacle_id] = new_obstacle
 
-        self.current_obstacles = obstacles
+        # Clean up stale tracked cones
+        stale_ids = [oid for oid, obs in self.tracked_obstacles.items()
+                     if current_time - obs.pose.t > self.expiring_time]
+        for oid in stale_ids:
+            rospy.loginfo(f"Removing stale obstacle: {oid}\n")
+            del self.tracked_obstacles[oid]
 
-        # If tracking not enabled, return only current frame detections
-        if not self.enable_tracking:
-            for obstacle_id, obstacle in self.current_obstacles.items():
+        # Log and return tracked obstacles if tracking enabled
+        if self.enable_tracking:
+            for obstacle_id, obstacle in self.tracked_obstacles.items():
                 p = obstacle.pose
                 rospy.loginfo(
                     f"Cone ID: {obstacle_id}\n"
@@ -405,17 +417,10 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                     f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
                     f"state:{obstacle.state}"
                 )
-            end = time.time()
-            # print('-------processing time', end -start)
-            return self.current_obstacles
-
-        stale_ids = [obstacle_id for obstacle_id, obstacle in self.tracked_obstacles.items()
-                     if current_time - obstacle.pose.t > 20.0]
-        for obstacle_id in stale_ids:
-            rospy.loginfo(f"Removing stale obstacle: {obstacle_id}\n")
-            del self.tracked_obstacles[obstacle_id]
-        if self.enable_tracking:
-            for obstacle_id, obstacle in self.tracked_obstacles.items():
+            return self.tracked_obstacles
+        else:
+            # Log and return detections for current frame only
+            for obstacle_id, obstacle in obstacles.items():
                 p = obstacle.pose
                 rospy.loginfo(
                     f"Cone ID: {obstacle_id}\n"
@@ -423,18 +428,14 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
                     f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
                     f"state:{obstacle.state}"
                 )
-        end = time.time()
-        # print('-------processing time', end -start)
-        return self.tracked_obstacles
+            return obstacles
 
     def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None:
+        # Save raw image, LiDAR, and vehicle state to disk for debugging
         os.makedirs("data", exist_ok=True)
         tstamp = int(self.vehicle_interface.time() * 1000)
-        # 1) Dump raw image
         cv2.imwrite(f"data/{tstamp}_image.png", latest_image)
-        # 2) Dump raw LiDAR
         np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar)
-        # 3) Write BEFORE_TRANSFORM
         with open(f"data/{tstamp}_vehstate.txt", "w") as f:
             vp = vehicle.pose
             f.write(
@@ -442,7 +443,6 @@ def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None:
                 f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, "
                 f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n"
             )
-        # Compute vehicle_start_pose in either START or CURRENT
         if self.use_start_frame:
             if self.start_pose_abs is None:
                 self.start_pose_abs = vehicle.pose
@@ -458,21 +458,21 @@ def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None:
         with open(f"data/{tstamp}_vehstate.txt", "a") as f:
             f.write(
                 f"AFTER_TRANSFORM "
-                f"x={vehicle_start_pose.x:.3f}, "
-                f"y={vehicle_start_pose.y:.3f}, "
-                f"z={vehicle_start_pose.z:.3f}, "
-                f"yaw={vehicle_start_pose.yaw:.2f}, "
-                f"pitch={vehicle_start_pose.pitch:.2f}, "
-                f"roll={vehicle_start_pose.roll:.2f}, "
-                f"frame={mode}\n"
+                f"x={vehicle_start_pose.x:.3f}, y={vehicle_start_pose.y:.3f}, z={vehicle_start_pose.z:.3f}, "
+                f"yaw={vehicle_start_pose.yaw:.2f}, pitch={vehicle_start_pose.pitch:.2f}, "
+                f"roll={vehicle_start_pose.roll:.2f}, frame={mode}\n"
             )
 
-    # ----- Fake Cone Detector 2D (for Testing Purposes) -----
 
+# ----- Fake Cone Detector 2D (for Testing Purposes) -----
 
 class FakConeDetector(Component):
+    """
+    Simulates cone detections at preset time intervals for testing pipeline.
+    """
     def __init__(self, vehicle_interface: GEMInterface):
         self.vehicle_interface = vehicle_interface
+        # Simulated detection windows (start, end)
         self.times = [(5.0, 20.0), (30.0, 35.0)]
         self.t_start = None
 
@@ -491,7 +491,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
         t = self.vehicle_interface.time() - self.t_start
         res = {}
         for time_range in self.times:
-            if t >= time_range[0] and t <= time_range[1]:
+            if time_range[0] <= t <= time_range[1]:
                 res['cone0'] = box_to_fake_obstacle((0, 0, 0, 0))
                 rospy.loginfo("Detected a Cone (simulated)")
         return res
@@ -499,11 +499,13 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]:
 
 def box_to_fake_obstacle(box):
     x, y, w, h = box
-    pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
+    pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0,
+                      yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT)
     dims = (w, h, 0)
     return Obstacle(pose=pose, dimensions=dims, outline=None,
-                         material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING, collidable=True)
+                    material=ObstacleMaterialEnum.TRAFFIC_CONE,
+                    state=ObstacleStateEnum.STANDING, collidable=True)
 
 
 if __name__ == '__main__':
-    pass
\ No newline at end of file
+    pass

From 63eab6eda74d4b5062750ec466ca5fe4ba0b7c7a Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 16 May 2025 09:35:24 -0500
Subject: [PATCH 3/5] Update cone_detection.yaml

---
 launch/cone_detection.yaml | 51 ++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml
index 6b2d0a82..c0159f22 100644
--- a/launch/cone_detection.yaml
+++ b/launch/cone_detection.yaml
@@ -15,14 +15,27 @@ drive:
         obstacle_detection :
             type: cone_detection.ConeDetector3D
             args:
-                camera_name: front #[front, front_right]
-                camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml
-                enable_tracking:  True   # True if you want to enable tracking
-                visualize_2d:     False  # True to see 2D detection visualization
-                use_cyl_roi:      False  # True to use a cylinder ROI
-                save_data:        False  # True to save sensor input data
-                orientation:      True   # True to detect flipped cones
-                use_start_frame:  True   # True to output in START frame
+                camera_name: front        # [front, front_right]
+                camera_calib_file: ./GEMstack/knowledge/calibration/gem_e4_perception_cameras.yaml
+                enable_tracking: True         # Enable persistent cone tracking
+                visualize_2d: False        # Display 2D bounding‐box overlays if True
+                save_data: False        # Save raw images & LiDAR for offline debugging
+                orientation: True         # Also detect cones when image is rotated
+                use_start_frame: True         # Report cone poses in the START frame
+                # If you want to change a detection model, edit the path here
+                model_path: ./GEMstack/knowledge/detection/cone.pt
+                # In most cases you don’t need to change the settings below
+                # they’ve been fine-tuned based on data from the vertical teams.
+                use_cyl_roi: False         # Apply an additional cylindrical ROI filter. Useful when calibration is not reliable
+                downsample: False         # Voxel-downsample LiDAR points for faster processing
+                conf_normal: 0.35          # YOLO confidence threshold for upright cones
+                conf_left: 0.10          # YOLO confidence threshold for left-rotated cones
+                conf_right: 0.10          # YOLO confidence threshold for right-rotated cones
+                max_depth: 40            # Maximum LiDAR range (meters) to include in ROI
+                z_range: 0.08          # Minimum height above ground to remove ground plane
+                max_depth_diff: 0.50          # Maximum local depth variance to cone pointcloud cluster
+                alpha: 0.10          # EMA smoothing weight for updating tracked poses
+                expiring_time: 20            # Time (seconds) after which a tracked cone is removed
 
         perception_normalization : StandardPerceptionNormalizer
     planning:
@@ -30,20 +43,20 @@ drive:
 
 log:
     # Specify the top-level folder to save the log files.  Default is 'logs'
-    #folder : 'logs' 
+    #folder : 'logs'
     # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix
-    #prefix : 'fixed_route_'  
+    #prefix : 'fixed_route_'
     # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix
-    #suffix : 'test3'  
+    #suffix : 'test3'
     # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics.
-    ros_topics : 
+    ros_topics :
     # Specify options to pass to rosbag record. Default is no options.
-    #rosbag_options : '--split --size=1024' 
+    #rosbag_options : '--split --size=1024'
     # If True, then record all readings / commands of the vehicle interface. Default False
     vehicle_interface : True
     # Specify which components to record to behavior.json. Default records nothing
     components : ['state_estimation','obstacle_detection','motion_planning']
-    # Specify which components of state to record to state.json. Default records nothing 
+    # Specify which components of state to record to state.json. Default records nothing
     #state: ['all']
     # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate
     #state_rate: 10
@@ -51,7 +64,7 @@ log:
     #auto_plot : True
 replay:  # Add items here to set certain topics / inputs to be replayed from logs
     # Specify which log folder to replay from
-    log: 
+    log:
     ros_topics : []
     components : []
 
@@ -76,11 +89,11 @@ variants:
             mission_execution: StandardExecutor
             require_engaged: False
             visualization: !include "klampt_visualization.yaml"
-            drive: 
+            drive:
                 perception:
                     state_estimation : OmniscientStateEstimator
                 planning:
-                    relations_estimation: 
+                    relations_estimation:
                         type: pedestrian_yield_logic.PedestrianYielder
                         args:
                             mode: 'sim'
@@ -101,13 +114,13 @@ variants:
                 args:
                     scene: !relative_path '../scenes/xyhead_demo.yaml'
             visualization: !include "klampt_visualization.yaml"
-            drive: 
+            drive:
                 perception:
                     # agent_detection : pedestrian_detection.FakePedestrianDetector2D
                     agent_detection : OmniscientAgentDetector  #this option reads agents from the simulator
                     state_estimation : OmniscientStateEstimator
                 planning:
-                    relations_estimation: 
+                    relations_estimation:
                         type: pedestrian_yield_logic.PedestrianYielder
                         args:
                             mode: 'sim'

From b9a69f00e7f47c2c71309f1f61237b910dc402f6 Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 16 May 2025 09:36:36 -0500
Subject: [PATCH 4/5] Create pedestrian_detection.py

---
 .../perception/pedestrian_detection.py        | 390 ++++++++++++++++++
 1 file changed, 390 insertions(+)
 create mode 100644 GEMstack/onboard/perception/pedestrian_detection.py

diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
new file mode 100644
index 00000000..cbad22e3
--- /dev/null
+++ b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -0,0 +1,390 @@
+from ...state import VehicleState, ObjectFrameEnum, AgentEnum, AgentActivityEnum
+from ..interface.gem import GEMInterface
+from ..component import Component
+from GEMstack.onboard.perception.utils.perception_utils import *  # If you want to alias functions for clarity, do so in perception_utils
+from ultralytics import YOLO
+import cv2
+from typing import Dict
+import open3d as o3d
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+import rospy
+from sensor_msgs.msg import PointCloud2, Image
+from message_filters import Subscriber, ApproximateTimeSynchronizer
+from cv_bridge import CvBridge
+import os
+import yaml
+
+
+class PedestrianDetector3D(Component):
+    """
+    Detects pedestrians by fusing YOLO 2D detections with LiDAR point cloud data.
+    Tracking is optional: `enable_tracking=False` returns only current-frame detections.
+    Calculates and outputs each pedestrian's velocity and yaw rate.
+    """
+
+    def __init__(
+            self,
+            vehicle_interface: GEMInterface,
+            camera_name: str,
+            camera_calib_file: str,
+            enable_tracking: bool = True,
+            visualize_2d: bool = False,
+            use_cyl_roi: bool = False,
+            save_data: bool = True,
+            use_start_frame: bool = True,
+            **kwargs
+    ):
+        # Core interfaces and state
+        self.vehicle_interface = vehicle_interface
+        self.current_agents = {}
+        self.tracked_agents = {}
+        self.pedestrian_counter = 0
+        self.latest_image = None
+        self.latest_lidar = None
+        self.bridge = CvBridge()
+        self.start_pose_abs = None
+        self.start_time = None
+
+        # Configuration
+        self.camera_name = camera_name
+        self.enable_tracking = enable_tracking
+        self.visualize_2d = visualize_2d
+        self.use_cyl_roi = use_cyl_roi
+        self.save_data = save_data
+        self.use_start_frame = use_start_frame
+
+        # 2) Load camera intrinsics/extrinsics from YAML
+        with open(camera_calib_file, 'r') as f:
+            calib = yaml.safe_load(f)
+        cam_cfg = calib['cameras'][camera_name]
+        self.K = np.array(cam_cfg['K'])
+        self.D = np.array(cam_cfg['D'])
+        self.T_l2c = np.array(cam_cfg['T_l2c'])
+        self.T_l2v = np.array(cam_cfg['T_l2v'])  # LiDAR-to-vehicle transform
+
+        self.undistort_map1 = None
+        self.undistort_map2 = None
+        self.camera_front = (camera_name == 'front')
+
+    def rate(self) -> float:
+        return 8.0
+
+    def state_inputs(self) -> list:
+        return ['vehicle']
+
+    def state_outputs(self) -> list:
+        return ['agents']
+
+    def initialize(self):
+        # Subscribe to RGB & LiDAR streams
+        rgb_topic_map = {
+            'front': '/oak/rgb/image_raw',
+            'front_right': '/camera_fr/arena_camera_node/image_raw',
+        }
+        rgb_topic = rgb_topic_map.get(self.camera_name,
+                                      f'/{self.camera_name}/rgb/image_raw')
+        self.rgb_sub = Subscriber(rgb_topic, Image)
+        self.lidar_sub = Subscriber('/ouster/points', PointCloud2)
+        self.sync = ApproximateTimeSynchronizer(
+            [self.rgb_sub, self.lidar_sub], queue_size=500, slop=0.025
+        )
+        self.sync.registerCallback(self.synchronized_callback)
+
+        # Initialize YOLO pedestrian detection model (COCO class 0)
+        self.detector = YOLO('GEMstack/knowledge/detection/yolov8n.pt')
+        self.detector.to('cuda')
+
+        # Compute camera-to-LiDAR transform
+        self.T_c2l = np.linalg.inv(self.T_l2c)
+        self.R_c2l = self.T_c2l[:3, :3]
+        self.camera_origin_in_lidar = self.T_c2l[:3, 3]
+
+    def synchronized_callback(self, image_msg, lidar_msg):
+        try:
+            self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8")
+        except Exception as e:
+            rospy.logerr(f"Failed to convert image: {e}")
+            self.latest_image = None
+        self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False)
+
+    def undistort_image(self, image, K, D):
+        h, w = image.shape[:2]
+        newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
+        if self.undistort_map1 is None:
+            self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(
+                K, D, None, newK, (w, h), cv2.CV_32FC1
+            )
+        undistorted = cv2.remap(
+            image, self.undistort_map1, self.undistort_map2,
+            interpolation=cv2.INTER_NEAREST
+        )
+        return undistorted, newK
+
+    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
+        # Ensure both image and LiDAR data are available
+        if self.latest_image is None or self.latest_lidar is None:
+            return {}
+
+        current_time = self.vehicle_interface.time()
+        if self.start_time is None:
+            self.start_time = current_time
+
+        # Optionally save raw sensor data
+        if self.save_data:
+            self.save_sensor_data(vehicle, self.latest_image)
+
+        # Undistort image if needed
+        if not self.camera_front:
+            img, self.current_K = self.undistort_image(
+                self.latest_image, self.K, self.D)
+        else:
+            img = self.latest_image.copy()
+            self.current_K = self.K
+
+        # Perform 2D detection
+        results = self.detector(img, conf=0.35, classes=[0])
+        boxes2d = (np.array(results[0].boxes.xywh.cpu())
+                   if len(results) > 0 else [])
+
+        # Project LiDAR points into image plane
+        lidar_pts = self.latest_lidar.copy()
+        pts_cam = transform_points_l2c(lidar_pts, self.T_l2c)
+        projected = project_points(pts_cam, self.current_K, lidar_pts)
+
+        agents: Dict[str, AgentState] = {}
+        for (cx, cy, w, h) in boxes2d:
+            # print(cx, cy, w, h)
+            # Define ROI in image for each detection
+            left = int(cx - w / 2)
+            right = int(cx + w / 2)
+            top = int(cy - h / 2)
+            bottom = int(cy + h / 2)
+            mask = ((projected[:, 0] >= left) & (projected[:, 0] <= right) &
+                    (projected[:, 1] >= top) & (projected[:, 1] <= bottom))
+            roi = projected[mask]
+            if roi.shape[0] < 5:
+                continue
+
+            # Filter point cloud
+            pts3d = roi[:, 2:5]
+            pts3d = filter_points_within_threshold(pts3d, 40)
+            pts3d = remove_ground_by_min_range(pts3d, z_range=0.08)
+            pts3d = filter_depth_points(pts3d, max_depth_diff=0.5)
+
+            if self.use_cyl_roi:
+                glob = filter_points_within_threshold(lidar_pts, 30)
+                cyl = cylindrical_roi(glob, np.mean(pts3d, axis=0), radius=0.5, height=2.0)
+                pts3d = remove_ground_by_min_range(cyl, z_range=0.01)
+                pts3d = filter_depth_points(pts3d, max_depth_diff=0.3)
+
+            if pts3d.shape[0] < 4:
+                continue
+
+            # Fit oriented bounding box
+            pcd = o3d.geometry.PointCloud()
+            pcd.points = o3d.utility.Vector3dVector(pts3d)
+            obb = pcd.get_oriented_bounding_box()
+            center = obb.center
+            dims = tuple(obb.extent)
+            R_lidar = obb.R
+
+            # Transform to vehicle frame
+            c_hom = np.append(center, 1)
+            veh_hom = self.T_l2v @ c_hom
+            veh_center = veh_hom[:3]
+
+            # Compute orientation
+            R_veh = self.T_l2v[:3, :3] @ R_lidar
+            yaw, pitch, roll = R.from_matrix(R_veh).as_euler('zyx', degrees=False)
+
+            # Convert to START or CURRENT frame
+            if self.use_start_frame:
+                if self.start_pose_abs is None:
+                    self.start_pose_abs = vehicle.pose
+                start_pose = vehicle.pose.to_frame(
+                    ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs)
+                T_vs = start_pose.transform()
+                xp, yp, zp = (T_vs @ np.append(veh_center, 1))[:3]
+                frame = ObjectFrameEnum.START
+            else:
+                xp, yp, zp = veh_center
+                frame = ObjectFrameEnum.CURRENT
+
+            new_pose = ObjectPose(
+                t=current_time,
+                x=xp, y=yp, z=zp,
+                yaw=yaw, pitch=pitch, roll=roll,
+                frame=frame
+            )
+
+            # Matching and tracking
+            if self.enable_tracking:
+                existing = match_existing_cone(
+                    np.array([new_pose.x, new_pose.y, new_pose.z]),
+                    dims, self.tracked_agents,
+                    distance_threshold=1.0
+                )
+                if existing is not None:
+                    old = self.tracked_agents[existing]
+                    dt = max(new_pose.t - old.pose.t, 1e-3)
+                    # Instantaneous velocity
+                    vx = (new_pose.x - old.pose.x) / dt
+                    vy = (new_pose.y - old.pose.y) / dt
+                    vz = (new_pose.z - old.pose.z) / dt
+                    # Yaw rate
+                    yaw_rate = (new_pose.yaw - old.pose.yaw) / dt
+                    speed = np.linalg.norm([vx, vy, vz])
+                    activity = (AgentActivityEnum.MOVING
+                                if speed > 0.1 else AgentActivityEnum.STOPPED)
+
+                    updated = AgentState(
+                        pose=new_pose,
+                        dimensions=dims,
+                        outline=None,
+                        type=AgentEnum.PEDESTRIAN,
+                        activity=activity,
+                        velocity=(vx, vy, vz),
+                        yaw_rate=yaw_rate
+                    )
+                    agents[existing] = updated
+                    self.tracked_agents[existing] = updated
+
+                else:
+                    aid = f"Pedestrian{self.pedestrian_counter}"
+                    self.pedestrian_counter += 1
+                    new_agent = AgentState(
+                        pose=new_pose,
+                        dimensions=dims,
+                        outline=None,
+                        type=AgentEnum.PEDESTRIAN,
+                        activity=AgentActivityEnum.STOPPED,
+                        velocity=(0, 0, 0),
+                        yaw_rate=0
+                    )
+                    agents[aid] = new_agent
+                    self.tracked_agents[aid] = new_agent
+                for agent_id, agent in self.current_agents.items():
+                    p = agent.pose
+                    rospy.loginfo(
+                        f"Agent ID: {agent_id}\n"
+                        f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, "
+                        f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n"
+                        f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n"
+                        f"type:{agent.activity}"
+                    )
+            else:
+                aid = f"Pedestrian{self.pedestrian_counter}"
+                self.pedestrian_counter += 1
+                agents[aid] = AgentState(
+                    pose=new_pose,
+                    dimensions=dims,
+                    outline=None,
+                    type=AgentEnum.PEDESTRIAN,
+                    activity=AgentActivityEnum.STOPPED,
+                    velocity=(0, 0, 0),
+                    yaw_rate=0
+                )
+
+        self.current_agents = agents
+
+        # Remove stale tracked agents
+        if self.enable_tracking:
+            stale = [
+                aid for aid, a in self.tracked_agents.items()
+                if current_time - a.pose.t > 10.0
+            ]
+            for aid in stale:
+                del self.tracked_agents[aid]
+            return self.tracked_agents
+
+        return self.current_agents
+
+    def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None:
+        os.makedirs("data", exist_ok=True)
+        t = int(self.vehicle_interface.time() * 1000)
+        cv2.imwrite(f"data/{t}_image.png", latest_image)
+        np.savez(f"data/{t}_lidar.npz", lidar=self.latest_lidar)
+        # Write BEFORE_TRANSFORM state
+        with open(f"data/{t}_vehstate.txt", "w") as f:
+            vp = vehicle.pose
+            f.write(
+                f"BEFORE_TRANSFORM "
+                f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, "
+                f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n"
+            )
+        # Compute start or current frame state
+        if self.use_start_frame:
+            if self.start_pose_abs is None:
+                self.start_pose_abs = vehicle.pose
+            vehicle_start_pose = vehicle.pose.to_frame(
+                ObjectFrameEnum.START,
+                vehicle.pose,
+                self.start_pose_abs
+            )
+            mode = "START"
+        else:
+            vehicle_start_pose = vehicle.pose
+            mode = "CURRENT"
+        with open(f"data/{t}_vehstate.txt", "a") as f:
+            f.write(
+                f"AFTER_TRANSFORM "
+                f"x={vehicle_start_pose.x:.3f}, "
+                f"y={vehicle_start_pose.y:.3f}, "
+                f"z={vehicle_start_pose.z:.3f}, "
+                f"yaw={vehicle_start_pose.yaw:.2f}, "
+                f"pitch={vehicle_start_pose.pitch:.2f}, "
+                f"roll={vehicle_start_pose.roll:.2f}, "
+                f"frame={mode}\n"
+            )
+
+
+# ----- Fake Cone Detector 2D (for Testing Purposes) -----
+class FakConeDetector(Component):
+    def __init__(self, vehicle_interface: GEMInterface):
+        self.vehicle_interface = vehicle_interface
+        self.times = [(5.0, 20.0), (30.0, 35.0)]
+        self.t_start = None
+
+    def rate(self):
+        return 4.0
+
+    def state_inputs(self):
+        return ['vehicle']
+
+    def state_outputs(self):
+        return ['agents']
+
+    def update(self, vehicle: VehicleState) -> Dict[str, AgentState]:
+        if self.t_start is None:
+            self.t_start = self.vehicle_interface.time()
+        t = self.vehicle_interface.time() - self.t_start
+        res = {}
+        for time_range in self.times:
+            if time_range[0] <= t <= time_range[1]:
+                res['Pedestrian0'] = box_to_fake_agent((0, 0, 0, 0))
+                rospy.loginfo("Detected a Pedestrian (simulated)")
+        return res
+
+
+def box_to_fake_agent(box):
+    x, y, w, h = box
+    pose = ObjectPose(
+        t=0, x=x + w / 2, y=y + h / 2, z=0,
+        yaw=0, pitch=0, roll=0,
+        frame=ObjectFrameEnum.CURRENT
+    )
+    dims = (w, h, 0)
+    return AgentState(
+        pose=pose,
+        dimensions=dims,
+        outline=None,
+        type=AgentEnum.PEDESTRIAN,
+        activity=AgentActivityEnum.MOVING,
+        velocity=(0, 0, 0),
+        yaw_rate=0
+    )
+
+
+if __name__ == '__main__':
+    pass

From c0bf22f594c2e160ea0bd53e5b27b481f407b9fb Mon Sep 17 00:00:00 2001
From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com>
Date: Fri, 16 May 2025 09:37:16 -0500
Subject: [PATCH 5/5] Create pedestrian_detection.yaml

---
 launch/pedestrian_detection.yaml | 132 +++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 launch/pedestrian_detection.yaml

diff --git a/launch/pedestrian_detection.yaml b/launch/pedestrian_detection.yaml
new file mode 100644
index 00000000..f491a6e9
--- /dev/null
+++ b/launch/pedestrian_detection.yaml
@@ -0,0 +1,132 @@
+description: "Run the yielding trajectory planner on the real vehicle with real perception"
+mode: hardware
+vehicle_interface: gem_hardware.GEMHardwareInterface
+mission_execution: StandardExecutor
+
+# Recovery behavior after a component failure
+recovery: 
+    planning: 
+        trajectory_tracking : recovery.StopTrajectoryTracker
+
+# Driving behavior for the GEM vehicle.  Runs real pedestrian perception, yield planner, but does not send commands to real vehicle.
+drive:
+
+    perception:
+        state_estimation: GNSSStateEstimator
+        agent_detection:
+            type: pedestrian_detection.PedestrianDetector3D
+            args:
+                camera_name: front #[front, front_right]
+                camera_calib_file: ./GEMstack/knowledge/calibration/gem_e4_perception_cameras.yaml
+
+                enable_tracking: True   # True if you want to enable tracking
+                visualize_2d: False  # True to see 2D detection visualization
+                use_cyl_roi: False  # True to use a cylinder ROI
+                save_data: False  # True to save sensor input data
+                orientation: False   # True to detect flipped cones
+                use_start_frame: True   # True to output in START frame
+
+        perception_normalization: StandardPerceptionNormalizer
+    planning:
+        relations_estimation: 
+            type: pedestrian_yield_logic.PedestrianYielder
+            args:
+                mode: 'real'
+                params: {
+                    'yielder':       'expert', # 'expert', 'analytic', or 'simulation'
+                    'planner':       'milestone', # 'milestone', 'dt', or 'dx'
+                    'desired_speed': 1.0,  # m/s, 1.5 m/s seems max speed? Feb24
+                    'acceleration':  0.75  # m/s2, 0.5 is not enough to start moving. Feb24
+                    }
+        route_planning:
+            type: StaticRoutePlanner
+            args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start']
+        motion_planning: longitudinal_planning.YieldTrajectoryPlanner
+        trajectory_tracking:
+            type: pure_pursuit.PurePursuitTrajectoryTracker
+            print: False
+
+
+log:
+    # Specify the top-level folder to save the log files.  Default is 'logs'
+    #folder : 'logs' 
+    # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix
+    #prefix : 'fixed_route_'  
+    # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix
+    #suffix : 'test3'  
+    # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics.
+    ros_topics : 
+    # Specify options to pass to rosbag record. Default is no options.
+    #rosbag_options : '--split --size=1024' 
+    # If True, then record all readings / commands of the vehicle interface. Default False
+    vehicle_interface : True
+    # Specify which components to record to behavior.json. Default records nothing
+    components : ['state_estimation','agent_detection','motion_planning']
+    # Specify which components of state to record to state.json. Default records nothing 
+    #state: ['all']
+    # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate
+    #state_rate: 10
+    # If True, then make plots of the recorded data specifically from PurePursuitTrajectoryTracker_debug.csv and report metrics. Default False
+    #auto_plot : True
+replay:  # Add items here to set certain topics / inputs to be replayed from logs
+    # Specify which log folder to replay from
+    log: 
+    ros_topics : []
+    components : []
+
+#usually can keep this constant
+computation_graph: !include "../GEMstack/knowledge/defaults/computation_graph.yaml"
+
+variants:
+    detector_only:
+        run:
+            description: "Run the pedestrian detection code"
+        drive:
+            planning:
+                trajectory_tracking:
+    real_sim:
+        run:
+            description: "Run the pedestrian detection code with real detection and fake simulation"
+            mode: hardware
+            vehicle_interface:
+                type: gem_mixed.GEMRealSensorsWithSimMotionInterface
+                args:
+                    scene: !relative_path '../scenes/xyhead_demo.yaml'
+            mission_execution: StandardExecutor
+            require_engaged: False
+            visualization: !include "klampt_visualization.yaml"
+            drive: 
+                perception:
+                    state_estimation : OmniscientStateEstimator
+                planning:
+                    relations_estimation: 
+                        type: pedestrian_yield_logic.PedestrianYielder
+                        args:
+                            mode: 'sim'
+
+    fake_real:
+        run:
+            description: "Run the yielding trajectory planner on the real vehicle with faked perception"
+            drive:
+                perception:
+                    agent_detection : pedestrian_detection.FakePedestrianDetector2D
+
+    fake_sim:
+        run:
+            description: "Run the yielding trajectory planner in simulation with faked perception"
+            mode: simulation
+            vehicle_interface:
+                type: gem_simulator.GEMDoubleIntegratorSimulationInterface
+                args:
+                    scene: !relative_path '../scenes/xyhead_demo.yaml'
+            visualization: !include "klampt_visualization.yaml"
+            drive: 
+                perception:
+                    # agent_detection : pedestrian_detection.FakePedestrianDetector2D
+                    agent_detection : OmniscientAgentDetector  #this option reads agents from the simulator
+                    state_estimation : OmniscientStateEstimator
+                planning:
+                    relations_estimation: 
+                        type: pedestrian_yield_logic.PedestrianYielder
+                        args:
+                            mode: 'sim'