From 09dc046d7ca6662666f4b9efe51eb6c9d27888ef Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 16 May 2025 09:32:20 -0500 Subject: [PATCH 1/5] Create README.md --- GEMstack/knowledge/detection/README.md | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 GEMstack/knowledge/detection/README.md diff --git a/GEMstack/knowledge/detection/README.md b/GEMstack/knowledge/detection/README.md new file mode 100644 index 00000000..fdb9bf86 --- /dev/null +++ b/GEMstack/knowledge/detection/README.md @@ -0,0 +1,37 @@ +# Autonomous Vehicles Final Project: Perception Models + +Link to Google Drive where all models are stored: +https://drive.google.com/drive/folders/11oXlVTWNcYcLQbluM8bh6MY80lCBoJ0d?usp=drive_link + +## Contributed Models + +### Cone Detection – Bounding Box Models + +1. **Cone_Detector_30m_Under_YOLOv8.pt** + - Same as 'cone.pt' in the folder. Our default model. Good for most test scenarios. + - Optimized for detecting cones within 30 meters. This range is similiar to lidar's effective range for cone detection. + - Achieves 100% recall and precision with tightly fitting bounding boxes when cone <25m + - Limitation: fails to consistently detect cones beyond 30 meters + +2. **Cone_Detector_30m_Plus_YOLOv8.pt** + - Designed for cone detection beyond 30 meters, with high precision, recall, and mAP + - Overfitting observed due to training data redundancy, less reliable for closer range + - Serves as a strong baseline for long-range cone detection tasks + +## Parking Space Detection – Lane-Based Segmentation Models + +Models: +1. **parking_space_YOLO11n-seg.pt** +2. **parking_space_YOLO11s-seg.pt** +3. **parking_space_yolov8n-seg.pt** + +These models perform well on the original parking lot layout, accurately segmenting lanes and identifying available spaces. However, they do not generalize well to newly modified parking lots with different lane markings. To improve generalization, additional labeled images from the updated environment are required. + +> **Note:** While **YOLO11s-seg** currently shows the lowest performance among the three models, its larger architecture offers the most potential for improvement. With more diverse training data and fine-tuning, it could outperform both YOLOv8n-seg and YOLO11n-seg in accuracy and generalization. + +| Model | mAP@50 | mAP@50–95 | Benefits & Tradeoffs | +|-------------------|--------|-----------|---------------------------------------------------| +| Base model | N/A | N/A | Placeholder for comparison; not evaluated | +| YOLOv8n-seg | 0.886 | 0.634 | Fast, modern YOLOv8 variant; efficient and compact | +| YOLO11n-seg | 0.905 | 0.703 | Very fast and lightweight; great for real-time use | +| YOLO11s-seg | 0.899 | 0.685 | Balanced option with good accuracy and speed | From f9ceb76d0a1a5c15eb663499dc13b01fe02526a2 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 16 May 2025 09:34:40 -0500 Subject: [PATCH 2/5] Update cone_detection.py --- GEMstack/onboard/perception/cone_detection.py | 346 +++++++++--------- 1 file changed, 174 insertions(+), 172 deletions(-) diff --git a/GEMstack/onboard/perception/cone_detection.py b/GEMstack/onboard/perception/cone_detection.py index ab83d03d..69d1c0f7 100644 --- a/GEMstack/onboard/perception/cone_detection.py +++ b/GEMstack/onboard/perception/cone_detection.py @@ -1,8 +1,8 @@ -from ...state import AllState, VehicleState, ObjectPose, ObjectFrameEnum, Obstacle, ObstacleMaterialEnum, \ +from ...state import VehicleState, ObjectFrameEnum, Obstacle, ObstacleMaterialEnum, \ ObstacleStateEnum from ..interface.gem import GEMInterface from ..component import Component -from .perception_utils import * +from GEMstack.onboard.perception.utils.perception_utils import * from ultralytics import YOLO import cv2 from typing import Dict @@ -13,20 +13,15 @@ from sensor_msgs.msg import PointCloud2, Image from message_filters import Subscriber, ApproximateTimeSynchronizer from cv_bridge import CvBridge -import time import os import yaml class ConeDetector3D(Component): """ - Detects cones by fusing YOLO 2D detections with LiDAR point cloud data. - - Tracking is optional: set `enable_tracking=False` to disable persistent tracking - and return only detections from the current frame. - - Supports multiple cameras; each camera’s intrinsics and extrinsics are - loaded from a single YAML calibration file via plain PyYAML. + Detects traffic cones by fusing YOLO 2D detections with LiDAR point cloud data. + Tracking is optional: disable persistent tracking via `enable_tracking`. + Supports multiple cameras; each camera’s intrinsics/extrinsics are loaded via YAML. """ def __init__( @@ -40,20 +35,37 @@ def __init__( save_data: bool = True, orientation: bool = True, use_start_frame: bool = True, + # Processing parameters with defaults + model_path: str = 'GEMstack/knowledge/detection/cone.pt', + downsample: bool = False, + conf_normal: float = 0.35, + conf_left: float = 0.15, + conf_right: float = 0.15, + max_depth: float = 40, + z_range: float = 0.08, + max_depth_diff: float = 0.5, + alpha: float = 0.1, + expiring_time: float = 20.0, + update_rate: int = 8, **kwargs ): - # Core interfaces and state + # --- Core interfaces --- self.vehicle_interface = vehicle_interface + # Containers for current and tracked obstacles self.current_obstacles = {} self.tracked_obstacles = {} - self.cone_counter = 0 + self.cone_counter = 0 # unique ID counter + self.update_rate = update_rate + # Latest sensor data placeholders self.latest_image = None self.latest_lidar = None self.bridge = CvBridge() + + # Start pose & time references self.start_pose_abs = None self.start_time = None - # Config flags + # --- Config flags from init args --- self.camera_name = camera_name self.enable_tracking = enable_tracking self.visualize_2d = visualize_2d @@ -61,51 +73,56 @@ def __init__( self.save_data = save_data self.orientation = orientation self.use_start_frame = use_start_frame - - # 2) Load camera intrinsics/extrinsics from the supplied YAML + # Processing parameters + self.downsample = downsample + self.conf_normal = conf_normal + self.conf_left = conf_left + self.conf_right = conf_right + self.max_depth = max_depth + self.z_range = z_range + self.max_depth_diff = max_depth_diff + self.alpha = alpha + self.expiring_time = expiring_time + self.model_path = model_path + # --- Load camera calibration (intrinsics/extrinsics) --- with open(camera_calib_file, 'r') as f: calib = yaml.safe_load(f) - - # Expect structure: - # cameras: - # front: - # K: [[...], [...], [...]] - # D: [...] - # T_l2c: [[...], ..., [...]] cam_cfg = calib['cameras'][camera_name] - self.K = np.array(cam_cfg['K']) - self.D = np.array(cam_cfg['D']) - self.T_l2c = np.array(cam_cfg['T_l2c']) - self.T_l2v = np.array(cam_cfg['T_l2v']) - - # Derived transforms + self.K = np.array(cam_cfg['K']) # Camera intrinsic matrix + self.D = np.array(cam_cfg['D']) # Distortion coefficients + self.T_l2c = np.array(cam_cfg['T_l2c']) # LiDAR-to-camera transform + self.T_l2v = np.array(cam_cfg['T_l2v']) # LiDAR-to-vehicle transform + # Placeholders for undistortion maps self.undistort_map1 = None self.undistort_map2 = None self.camera_front = (camera_name == 'front') def rate(self) -> float: - return 8 + # Desired update frequency (Hz) + return self.update_rate def state_inputs(self) -> list: + # Inputs required: vehicle state return ['vehicle'] def state_outputs(self) -> list: + # Outputs provided: detected obstacles return ['obstacles'] def initialize(self): - # --- Determine the correct RGB topic for this camera --- + # Determine the correct ROS topic for the camera rgb_topic_map = { 'front': '/oak/rgb/image_raw', 'front_right': '/camera_fr/arena_camera_node/image_raw', - # add additional camera mappings here if needed + 'front_left': '/camera_fl/arena_camera_node/image_raw', } rgb_topic = rgb_topic_map.get( self.camera_name, f'/{self.camera_name}/rgb/image_raw' ) - # Subscribe to the RGB and LiDAR streams + # Subscribe to synchronized RGB and LiDAR streams self.rgb_sub = Subscriber(rgb_topic, Image) self.lidar_sub = Subscriber('/ouster/points', PointCloud2) self.sync = ApproximateTimeSynchronizer([ @@ -113,119 +130,114 @@ def initialize(self): ], queue_size=500, slop=0.03) self.sync.registerCallback(self.synchronized_callback) - # Initialize the YOLO detector - self.detector = YOLO('GEMstack/knowledge/detection/cone.pt') + # Initialize the YOLO detector model on GPU + self.detector = YOLO(self.model_path) self.detector.to('cuda') + + # Precompute camera-to-lidar transform and camera origin self.T_c2l = np.linalg.inv(self.T_l2c) self.R_c2l = self.T_c2l[:3, :3] self.camera_origin_in_lidar = self.T_c2l[:3, 3] def synchronized_callback(self, image_msg, lidar_msg): - step1 = time.time() + # Callback to convert incoming ROS messages to numpy formats try: self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") except Exception as e: rospy.logerr("Failed to convert image: {}".format(e)) self.latest_image = None - step2 = time.time() + # Convert PointCloud2 to numpy array self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) - step3 = time.time() - # print('image callback: ', step2 - step1, 'lidar callback ', step3 - step2) def undistort_image(self, image, K, D): + # Remove lens distortion and rectify the image h, w = image.shape[:2] newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) if self.undistort_map1 is None or self.undistort_map2 is None: - self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap(K, D, R=None, - newCameraMatrix=newK, size=(w, h), - m1type=cv2.CV_32FC1) - - start = time.time() - undistorted = cv2.remap(image, self.undistort_map1, self.undistort_map2, interpolation=cv2.INTER_NEAREST) - end = time.time() - # print('--------undistort', end-start) + self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap( + K, D, R=None, newCameraMatrix=newK, size=(w, h), m1type=cv2.CV_32FC1 + ) + undistorted = cv2.remap( + image, self.undistort_map1, self.undistort_map2, + interpolation=cv2.INTER_NEAREST + ) return undistorted, newK def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: - downsample = False - # Gate guards against data not being present for both sensors: + # Return empty if data not yet received from both sensors if self.latest_image is None or self.latest_lidar is None: return {} - latest_image = self.latest_image.copy() - # Set up current time variables - start = time.time() + latest_image = self.latest_image.copy() current_time = self.vehicle_interface.time() - if downsample: + # Optionally downsample point cloud + if self.downsample: lidar_down = downsample_points(self.latest_lidar, voxel_size=0.1) else: lidar_down = self.latest_lidar.copy() + # Initialize start time if self.start_time is None: self.start_time = current_time time_elapsed = current_time - self.start_time - # Ensure data/ exists and build timestamp + # Optionally save raw sensor data if self.save_data: self.save_sensor_data(vehicle=vehicle, latest_image=latest_image) - if self.camera_front == False: - start = time.time() + # Undistort if not using front camera + if not self.camera_front: undistorted_img, current_K = self.undistort_image(latest_image, self.K, self.D) - end = time.time() - # print('-------processing time undistort_image---', end -start) - self.current_K = current_K - orig_H, orig_W = undistorted_img.shape[:2] - - # --- Begin modifications for three-angle detection --- img_normal = undistorted_img else: img_normal = latest_image.copy() undistorted_img = latest_image.copy() - orig_H, orig_W = latest_image.shape[:2] - self.current_K = self.K - # print(self.K) - # print(self.T_l2c) - results_normal = self.detector(img_normal, conf=0.35, classes=[0]) + current_K = self.K + + # 2D detection on normal orientation + results_normal = self.detector(img_normal, conf=self.conf_normal, classes=[0]) + combined_boxes = [] if not self.enable_tracking: + # Reset ID counter if no tracking self.cone_counter = 0 + + # Optionally run detectors on rotated images for orientation if self.orientation: img_left = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_COUNTERCLOCKWISE) img_right = cv2.rotate(undistorted_img.copy(), cv2.ROTATE_90_CLOCKWISE) - results_left = self.detector(img_left, conf=0.15, classes=[0]) - results_right = self.detector(img_right, conf=0.15, classes=[0]) + results_left = self.detector(img_left, conf=self.conf_left, classes=[0]) + results_right = self.detector(img_right, conf=self.conf_right, classes=[0]) boxes_left = np.array(results_left[0].boxes.xywh.cpu()) if len(results_left) > 0 else [] boxes_right = np.array(results_right[0].boxes.xywh.cpu()) if len(results_right) > 0 else [] + # Transform rotated boxes back to normal image coords for box in boxes_left: cx, cy, w, h = box - new_cx = orig_W - 1 - cy + new_cx = undistorted_img.shape[1] - 1 - cy new_cy = cx - new_w = h # Swap width and height. - new_h = w - combined_boxes.append((new_cx, new_cy, new_w, new_h, ObstacleStateEnum.RIGHT)) + combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.RIGHT)) for box in boxes_right: cx, cy, w, h = box new_cx = cy - new_cy = orig_H - 1 - cx - new_w = h # Swap width and height. - new_h = w - combined_boxes.append((new_cx, new_cy, new_w, new_h, ObstacleStateEnum.LEFT)) + new_cy = undistorted_img.shape[0] - 1 - cx + combined_boxes.append((new_cx, new_cy, h, w, ObstacleStateEnum.LEFT)) + # Add normal orientation detections boxes_normal = np.array(results_normal[0].boxes.xywh.cpu()) if len(results_normal) > 0 else [] for box in boxes_normal: cx, cy, w, h = box combined_boxes.append((cx, cy, w, h, ObstacleStateEnum.STANDING)) - # Visualize the received images in 2D with their corresponding labels - # It draws rectangles and labels on the images: - if getattr(self, 'visualize_2d', False): + # 2D visualization if enabled + if self.visualize_2d: + # Draw bounding boxes and state labels on the image for (cx, cy, w, h, state) in combined_boxes: left = int(cx - w / 2) right = int(cx + w / 2) top = int(cy - h / 2) bottom = int(cy + h / 2) + color = (255, 255, 255) if state == ObstacleStateEnum.STANDING: color = (255, 0, 0) label = "STANDING" @@ -235,60 +247,53 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: elif state == ObstacleStateEnum.LEFT: color = (0, 0, 255) label = "LEFT" - else: - color = (255, 255, 255) - label = "UNKNOWN" cv2.rectangle(undistorted_img, (left, top), (right, bottom), color, 2) cv2.putText(undistorted_img, label, (left, max(top - 5, 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) cv2.imshow("Detection - Cone 2D", undistorted_img) - start = time.time() - # Transform the lidar points from lidar frame of reference to camera EXTRINSIC frame of reference. - # Then project the pixels onto the lidar points to "paint them" (essentially determine which points are associated with detected objects) + # Transform LiDAR points to camera frame and project to image pts_cam = transform_points_l2c(lidar_down, self.T_l2c) - projected_pts = project_points(pts_cam, self.current_K, lidar_down) - # What is returned: - # projected_pts[:, 0]: u-coordinate in the image (horizontal pixel position) - # projected_pts[:, 1]: v-coordinate in the image (vertical pixel position) - # projected_pts[:, 2:5]: original X, Y, Z coordinates in the LiDAR frame - - end = time.time() - # print('-------processing time1---', end -start) + projected_pts = project_points(pts_cam, current_K, lidar_down) obstacles = {} - - for i, box_info in enumerate(combined_boxes): + # Fuse 2D boxes with 3D points + for box_info in combined_boxes: cx, cy, w, h, state = box_info - # print(cx, cy, w, h) + # Define ROI in image coords left = int(cx - w / 1.6) right = int(cx + w / 1.6) top = int(cy - h / 2) bottom = int(cy + h / 2) - mask = (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & \ - (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) + mask = ( + (projected_pts[:, 0] >= left) & (projected_pts[:, 0] <= right) & + (projected_pts[:, 1] >= top) & (projected_pts[:, 1] <= bottom) + ) roi_pts = projected_pts[mask] - # print(roi_pts) if roi_pts.shape[0] < 5: continue + # Extract 3D points and apply depth filters points_3d = roi_pts[:, 2:5] - - points_3d = filter_points_within_threshold(points_3d, 40) - points_3d = remove_ground_by_min_range(points_3d, z_range=0.08) - points_3d = filter_depth_points(points_3d, max_depth_diff=0.5) + points_3d = filter_points_within_threshold(points_3d, self.max_depth) + points_3d = remove_ground_by_min_range(points_3d, z_range=self.z_range) + points_3d = filter_depth_points(points_3d, max_depth_diff=self.max_depth_diff) if self.use_cyl_roi: + # Optionally refine with cylindrical ROI global_filtered = filter_points_within_threshold(lidar_down, 30) - roi_cyl = cylindrical_roi(global_filtered, np.mean(points_3d, axis=0), radius=0.4, height=1.2) + roi_cyl = cylindrical_roi( + global_filtered, np.mean(points_3d, axis=0), radius=0.4, height=1.2 + ) refined_cluster = remove_ground_by_min_range(roi_cyl, z_range=0.01) refined_cluster = filter_depth_points(refined_cluster, max_depth_diff=0.3) else: refined_cluster = points_3d.copy() - # end1 = time.time() + if refined_cluster.shape[0] < 4: continue + # Compute oriented bounding box of the cluster pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(refined_cluster) obb = pcd.get_oriented_bounding_box() @@ -296,14 +301,17 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: dims = tuple(obb.extent) R_lidar = obb.R.copy() + # Transform center to vehicle frame refined_center_hom = np.append(refined_center, 1) refined_center_vehicle_hom = self.T_l2v @ refined_center_hom refined_center_vehicle = refined_center_vehicle_hom[:3] + # Compute orientation in vehicle frame R_vehicle = self.T_l2v[:3, :3] @ R_lidar yaw, pitch, roll = R.from_matrix(R_vehicle).as_euler('zyx', degrees=False) refined_center = refined_center_vehicle + # Optionally convert to start frame if self.use_start_frame: if self.start_pose_abs is None: self.start_pose_abs = vehicle.pose @@ -312,8 +320,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: vehicle.pose, self.start_pose_abs ) - T_vehicle_to_start = vehicle_start_pose.transform() - xp, yp, zp = (T_vehicle_to_start @ np.append(refined_center, 1))[:3] + xp, yp, zp = (vehicle_start_pose.transform() @ np.append(refined_center, 1))[:3] out_frame = ObjectFrameEnum.START else: xp, yp, zp = refined_center @@ -326,7 +333,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: frame=out_frame ) - # --- Optional tracking --- + # Tracking: match to existing cone or add new if self.enable_tracking: existing_id = match_existing_cone( np.array([new_pose.x, new_pose.y, new_pose.z]), @@ -335,39 +342,38 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: distance_threshold=2 ) if existing_id is not None: + # Smooth pose using exponential moving average old_state = self.tracked_obstacles[existing_id] - if vehicle.v < 100: - alpha = 0.1 - avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x - avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y - avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z - avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw - avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch - avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll - - updated_pose = ObjectPose( - t=new_pose.t, - x=avg_x, - y=avg_y, - z=avg_z, - yaw=avg_yaw, - pitch=avg_pitch, - roll=avg_roll, - frame=new_pose.frame - ) - updated_obstacle = Obstacle( - pose=updated_pose, - dimensions=dims, - outline=None, - material=ObstacleMaterialEnum.TRAFFIC_CONE, - state=state, - collidable=True - ) - else: - updated_obstacle = old_state + alpha = self.alpha + avg_x = alpha * new_pose.x + (1 - alpha) * old_state.pose.x + avg_y = alpha * new_pose.y + (1 - alpha) * old_state.pose.y + avg_z = alpha * new_pose.z + (1 - alpha) * old_state.pose.z + avg_yaw = alpha * new_pose.yaw + (1 - alpha) * old_state.pose.yaw + avg_pitch = alpha * new_pose.pitch + (1 - alpha) * old_state.pose.pitch + avg_roll = alpha * new_pose.roll + (1 - alpha) * old_state.pose.roll + + updated_pose = ObjectPose( + t=new_pose.t, + x=avg_x, + y=avg_y, + z=avg_z, + yaw=avg_yaw, + pitch=avg_pitch, + roll=avg_roll, + frame=new_pose.frame + ) + updated_obstacle = Obstacle( + pose=updated_pose, + dimensions=dims, + outline=None, + material=ObstacleMaterialEnum.TRAFFIC_CONE, + state=state, + collidable=True + ) obstacles[existing_id] = updated_obstacle self.tracked_obstacles[existing_id] = updated_obstacle else: + # Create new tracked obstacle obstacle_id = f"Cone{self.cone_counter}" self.cone_counter += 1 new_obstacle = Obstacle( @@ -381,6 +387,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: obstacles[obstacle_id] = new_obstacle self.tracked_obstacles[obstacle_id] = new_obstacle else: + # No tracking: return only current frame detections obstacle_id = f"Cone{self.cone_counter}" self.cone_counter += 1 new_obstacle = Obstacle( @@ -389,15 +396,20 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: outline=None, material=ObstacleMaterialEnum.TRAFFIC_CONE, state=state, - collidable = True + collidable=True ) obstacles[obstacle_id] = new_obstacle - self.current_obstacles = obstacles + # Clean up stale tracked cones + stale_ids = [oid for oid, obs in self.tracked_obstacles.items() + if current_time - obs.pose.t > self.expiring_time] + for oid in stale_ids: + rospy.loginfo(f"Removing stale obstacle: {oid}\n") + del self.tracked_obstacles[oid] - # If tracking not enabled, return only current frame detections - if not self.enable_tracking: - for obstacle_id, obstacle in self.current_obstacles.items(): + # Log and return tracked obstacles if tracking enabled + if self.enable_tracking: + for obstacle_id, obstacle in self.tracked_obstacles.items(): p = obstacle.pose rospy.loginfo( f"Cone ID: {obstacle_id}\n" @@ -405,17 +417,10 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" f"state:{obstacle.state}" ) - end = time.time() - # print('-------processing time', end -start) - return self.current_obstacles - - stale_ids = [obstacle_id for obstacle_id, obstacle in self.tracked_obstacles.items() - if current_time - obstacle.pose.t > 20.0] - for obstacle_id in stale_ids: - rospy.loginfo(f"Removing stale obstacle: {obstacle_id}\n") - del self.tracked_obstacles[obstacle_id] - if self.enable_tracking: - for obstacle_id, obstacle in self.tracked_obstacles.items(): + return self.tracked_obstacles + else: + # Log and return detections for current frame only + for obstacle_id, obstacle in obstacles.items(): p = obstacle.pose rospy.loginfo( f"Cone ID: {obstacle_id}\n" @@ -423,18 +428,14 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" f"state:{obstacle.state}" ) - end = time.time() - # print('-------processing time', end -start) - return self.tracked_obstacles + return obstacles def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None: + # Save raw image, LiDAR, and vehicle state to disk for debugging os.makedirs("data", exist_ok=True) tstamp = int(self.vehicle_interface.time() * 1000) - # 1) Dump raw image cv2.imwrite(f"data/{tstamp}_image.png", latest_image) - # 2) Dump raw LiDAR np.savez(f"data/{tstamp}_lidar.npz", lidar=self.latest_lidar) - # 3) Write BEFORE_TRANSFORM with open(f"data/{tstamp}_vehstate.txt", "w") as f: vp = vehicle.pose f.write( @@ -442,7 +443,6 @@ def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None: f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" ) - # Compute vehicle_start_pose in either START or CURRENT if self.use_start_frame: if self.start_pose_abs is None: self.start_pose_abs = vehicle.pose @@ -458,21 +458,21 @@ def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None: with open(f"data/{tstamp}_vehstate.txt", "a") as f: f.write( f"AFTER_TRANSFORM " - f"x={vehicle_start_pose.x:.3f}, " - f"y={vehicle_start_pose.y:.3f}, " - f"z={vehicle_start_pose.z:.3f}, " - f"yaw={vehicle_start_pose.yaw:.2f}, " - f"pitch={vehicle_start_pose.pitch:.2f}, " - f"roll={vehicle_start_pose.roll:.2f}, " - f"frame={mode}\n" + f"x={vehicle_start_pose.x:.3f}, y={vehicle_start_pose.y:.3f}, z={vehicle_start_pose.z:.3f}, " + f"yaw={vehicle_start_pose.yaw:.2f}, pitch={vehicle_start_pose.pitch:.2f}, " + f"roll={vehicle_start_pose.roll:.2f}, frame={mode}\n" ) - # ----- Fake Cone Detector 2D (for Testing Purposes) ----- +# ----- Fake Cone Detector 2D (for Testing Purposes) ----- class FakConeDetector(Component): + """ + Simulates cone detections at preset time intervals for testing pipeline. + """ def __init__(self, vehicle_interface: GEMInterface): self.vehicle_interface = vehicle_interface + # Simulated detection windows (start, end) self.times = [(5.0, 20.0), (30.0, 35.0)] self.t_start = None @@ -491,7 +491,7 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: t = self.vehicle_interface.time() - self.t_start res = {} for time_range in self.times: - if t >= time_range[0] and t <= time_range[1]: + if time_range[0] <= t <= time_range[1]: res['cone0'] = box_to_fake_obstacle((0, 0, 0, 0)) rospy.loginfo("Detected a Cone (simulated)") return res @@ -499,11 +499,13 @@ def update(self, vehicle: VehicleState) -> Dict[str, Obstacle]: def box_to_fake_obstacle(box): x, y, w, h = box - pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) + pose = ObjectPose(t=0, x=x + w / 2, y=y + h / 2, z=0, + yaw=0, pitch=0, roll=0, frame=ObjectFrameEnum.CURRENT) dims = (w, h, 0) return Obstacle(pose=pose, dimensions=dims, outline=None, - material=ObstacleMaterialEnum.TRAFFIC_CONE, state=ObstacleStateEnum.STANDING, collidable=True) + material=ObstacleMaterialEnum.TRAFFIC_CONE, + state=ObstacleStateEnum.STANDING, collidable=True) if __name__ == '__main__': - pass \ No newline at end of file + pass From 63eab6eda74d4b5062750ec466ca5fe4ba0b7c7a Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 16 May 2025 09:35:24 -0500 Subject: [PATCH 3/5] Update cone_detection.yaml --- launch/cone_detection.yaml | 51 ++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/launch/cone_detection.yaml b/launch/cone_detection.yaml index 6b2d0a82..c0159f22 100644 --- a/launch/cone_detection.yaml +++ b/launch/cone_detection.yaml @@ -15,14 +15,27 @@ drive: obstacle_detection : type: cone_detection.ConeDetector3D args: - camera_name: front #[front, front_right] - camera_calib_file: ./GEMstack/knowledge/calibration/cameras.yaml - enable_tracking: True # True if you want to enable tracking - visualize_2d: False # True to see 2D detection visualization - use_cyl_roi: False # True to use a cylinder ROI - save_data: False # True to save sensor input data - orientation: True # True to detect flipped cones - use_start_frame: True # True to output in START frame + camera_name: front # [front, front_right] + camera_calib_file: ./GEMstack/knowledge/calibration/gem_e4_perception_cameras.yaml + enable_tracking: True # Enable persistent cone tracking + visualize_2d: False # Display 2D bounding‐box overlays if True + save_data: False # Save raw images & LiDAR for offline debugging + orientation: True # Also detect cones when image is rotated + use_start_frame: True # Report cone poses in the START frame + # If you want to change a detection model, edit the path here + model_path: ./GEMstack/knowledge/detection/cone.pt + # In most cases you don’t need to change the settings below + # they’ve been fine-tuned based on data from the vertical teams. + use_cyl_roi: False # Apply an additional cylindrical ROI filter. Useful when calibration is not reliable + downsample: False # Voxel-downsample LiDAR points for faster processing + conf_normal: 0.35 # YOLO confidence threshold for upright cones + conf_left: 0.10 # YOLO confidence threshold for left-rotated cones + conf_right: 0.10 # YOLO confidence threshold for right-rotated cones + max_depth: 40 # Maximum LiDAR range (meters) to include in ROI + z_range: 0.08 # Minimum height above ground to remove ground plane + max_depth_diff: 0.50 # Maximum local depth variance to cone pointcloud cluster + alpha: 0.10 # EMA smoothing weight for updating tracked poses + expiring_time: 20 # Time (seconds) after which a tracked cone is removed perception_normalization : StandardPerceptionNormalizer planning: @@ -30,20 +43,20 @@ drive: log: # Specify the top-level folder to save the log files. Default is 'logs' - #folder : 'logs' + #folder : 'logs' # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix - #prefix : 'fixed_route_' + #prefix : 'fixed_route_' # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix - #suffix : 'test3' + #suffix : 'test3' # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics. - ros_topics : + ros_topics : # Specify options to pass to rosbag record. Default is no options. - #rosbag_options : '--split --size=1024' + #rosbag_options : '--split --size=1024' # If True, then record all readings / commands of the vehicle interface. Default False vehicle_interface : True # Specify which components to record to behavior.json. Default records nothing components : ['state_estimation','obstacle_detection','motion_planning'] - # Specify which components of state to record to state.json. Default records nothing + # Specify which components of state to record to state.json. Default records nothing #state: ['all'] # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate #state_rate: 10 @@ -51,7 +64,7 @@ log: #auto_plot : True replay: # Add items here to set certain topics / inputs to be replayed from logs # Specify which log folder to replay from - log: + log: ros_topics : [] components : [] @@ -76,11 +89,11 @@ variants: mission_execution: StandardExecutor require_engaged: False visualization: !include "klampt_visualization.yaml" - drive: + drive: perception: state_estimation : OmniscientStateEstimator planning: - relations_estimation: + relations_estimation: type: pedestrian_yield_logic.PedestrianYielder args: mode: 'sim' @@ -101,13 +114,13 @@ variants: args: scene: !relative_path '../scenes/xyhead_demo.yaml' visualization: !include "klampt_visualization.yaml" - drive: + drive: perception: # agent_detection : pedestrian_detection.FakePedestrianDetector2D agent_detection : OmniscientAgentDetector #this option reads agents from the simulator state_estimation : OmniscientStateEstimator planning: - relations_estimation: + relations_estimation: type: pedestrian_yield_logic.PedestrianYielder args: mode: 'sim' From b9a69f00e7f47c2c71309f1f61237b910dc402f6 Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 16 May 2025 09:36:36 -0500 Subject: [PATCH 4/5] Create pedestrian_detection.py --- .../perception/pedestrian_detection.py | 390 ++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100644 GEMstack/onboard/perception/pedestrian_detection.py diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py new file mode 100644 index 00000000..cbad22e3 --- /dev/null +++ b/GEMstack/onboard/perception/pedestrian_detection.py @@ -0,0 +1,390 @@ +from ...state import VehicleState, ObjectFrameEnum, AgentEnum, AgentActivityEnum +from ..interface.gem import GEMInterface +from ..component import Component +from GEMstack.onboard.perception.utils.perception_utils import * # If you want to alias functions for clarity, do so in perception_utils +from ultralytics import YOLO +import cv2 +from typing import Dict +import open3d as o3d +import numpy as np +from scipy.spatial.transform import Rotation as R +import rospy +from sensor_msgs.msg import PointCloud2, Image +from message_filters import Subscriber, ApproximateTimeSynchronizer +from cv_bridge import CvBridge +import os +import yaml + + +class PedestrianDetector3D(Component): + """ + Detects pedestrians by fusing YOLO 2D detections with LiDAR point cloud data. + Tracking is optional: `enable_tracking=False` returns only current-frame detections. + Calculates and outputs each pedestrian's velocity and yaw rate. + """ + + def __init__( + self, + vehicle_interface: GEMInterface, + camera_name: str, + camera_calib_file: str, + enable_tracking: bool = True, + visualize_2d: bool = False, + use_cyl_roi: bool = False, + save_data: bool = True, + use_start_frame: bool = True, + **kwargs + ): + # Core interfaces and state + self.vehicle_interface = vehicle_interface + self.current_agents = {} + self.tracked_agents = {} + self.pedestrian_counter = 0 + self.latest_image = None + self.latest_lidar = None + self.bridge = CvBridge() + self.start_pose_abs = None + self.start_time = None + + # Configuration + self.camera_name = camera_name + self.enable_tracking = enable_tracking + self.visualize_2d = visualize_2d + self.use_cyl_roi = use_cyl_roi + self.save_data = save_data + self.use_start_frame = use_start_frame + + # 2) Load camera intrinsics/extrinsics from YAML + with open(camera_calib_file, 'r') as f: + calib = yaml.safe_load(f) + cam_cfg = calib['cameras'][camera_name] + self.K = np.array(cam_cfg['K']) + self.D = np.array(cam_cfg['D']) + self.T_l2c = np.array(cam_cfg['T_l2c']) + self.T_l2v = np.array(cam_cfg['T_l2v']) # LiDAR-to-vehicle transform + + self.undistort_map1 = None + self.undistort_map2 = None + self.camera_front = (camera_name == 'front') + + def rate(self) -> float: + return 8.0 + + def state_inputs(self) -> list: + return ['vehicle'] + + def state_outputs(self) -> list: + return ['agents'] + + def initialize(self): + # Subscribe to RGB & LiDAR streams + rgb_topic_map = { + 'front': '/oak/rgb/image_raw', + 'front_right': '/camera_fr/arena_camera_node/image_raw', + } + rgb_topic = rgb_topic_map.get(self.camera_name, + f'/{self.camera_name}/rgb/image_raw') + self.rgb_sub = Subscriber(rgb_topic, Image) + self.lidar_sub = Subscriber('/ouster/points', PointCloud2) + self.sync = ApproximateTimeSynchronizer( + [self.rgb_sub, self.lidar_sub], queue_size=500, slop=0.025 + ) + self.sync.registerCallback(self.synchronized_callback) + + # Initialize YOLO pedestrian detection model (COCO class 0) + self.detector = YOLO('GEMstack/knowledge/detection/yolov8n.pt') + self.detector.to('cuda') + + # Compute camera-to-LiDAR transform + self.T_c2l = np.linalg.inv(self.T_l2c) + self.R_c2l = self.T_c2l[:3, :3] + self.camera_origin_in_lidar = self.T_c2l[:3, 3] + + def synchronized_callback(self, image_msg, lidar_msg): + try: + self.latest_image = self.bridge.imgmsg_to_cv2(image_msg, "bgr8") + except Exception as e: + rospy.logerr(f"Failed to convert image: {e}") + self.latest_image = None + self.latest_lidar = pc2_to_numpy(lidar_msg, want_rgb=False) + + def undistort_image(self, image, K, D): + h, w = image.shape[:2] + newK, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h)) + if self.undistort_map1 is None: + self.undistort_map1, self.undistort_map2 = cv2.initUndistortRectifyMap( + K, D, None, newK, (w, h), cv2.CV_32FC1 + ) + undistorted = cv2.remap( + image, self.undistort_map1, self.undistort_map2, + interpolation=cv2.INTER_NEAREST + ) + return undistorted, newK + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + # Ensure both image and LiDAR data are available + if self.latest_image is None or self.latest_lidar is None: + return {} + + current_time = self.vehicle_interface.time() + if self.start_time is None: + self.start_time = current_time + + # Optionally save raw sensor data + if self.save_data: + self.save_sensor_data(vehicle, self.latest_image) + + # Undistort image if needed + if not self.camera_front: + img, self.current_K = self.undistort_image( + self.latest_image, self.K, self.D) + else: + img = self.latest_image.copy() + self.current_K = self.K + + # Perform 2D detection + results = self.detector(img, conf=0.35, classes=[0]) + boxes2d = (np.array(results[0].boxes.xywh.cpu()) + if len(results) > 0 else []) + + # Project LiDAR points into image plane + lidar_pts = self.latest_lidar.copy() + pts_cam = transform_points_l2c(lidar_pts, self.T_l2c) + projected = project_points(pts_cam, self.current_K, lidar_pts) + + agents: Dict[str, AgentState] = {} + for (cx, cy, w, h) in boxes2d: + # print(cx, cy, w, h) + # Define ROI in image for each detection + left = int(cx - w / 2) + right = int(cx + w / 2) + top = int(cy - h / 2) + bottom = int(cy + h / 2) + mask = ((projected[:, 0] >= left) & (projected[:, 0] <= right) & + (projected[:, 1] >= top) & (projected[:, 1] <= bottom)) + roi = projected[mask] + if roi.shape[0] < 5: + continue + + # Filter point cloud + pts3d = roi[:, 2:5] + pts3d = filter_points_within_threshold(pts3d, 40) + pts3d = remove_ground_by_min_range(pts3d, z_range=0.08) + pts3d = filter_depth_points(pts3d, max_depth_diff=0.5) + + if self.use_cyl_roi: + glob = filter_points_within_threshold(lidar_pts, 30) + cyl = cylindrical_roi(glob, np.mean(pts3d, axis=0), radius=0.5, height=2.0) + pts3d = remove_ground_by_min_range(cyl, z_range=0.01) + pts3d = filter_depth_points(pts3d, max_depth_diff=0.3) + + if pts3d.shape[0] < 4: + continue + + # Fit oriented bounding box + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(pts3d) + obb = pcd.get_oriented_bounding_box() + center = obb.center + dims = tuple(obb.extent) + R_lidar = obb.R + + # Transform to vehicle frame + c_hom = np.append(center, 1) + veh_hom = self.T_l2v @ c_hom + veh_center = veh_hom[:3] + + # Compute orientation + R_veh = self.T_l2v[:3, :3] @ R_lidar + yaw, pitch, roll = R.from_matrix(R_veh).as_euler('zyx', degrees=False) + + # Convert to START or CURRENT frame + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, vehicle.pose, self.start_pose_abs) + T_vs = start_pose.transform() + xp, yp, zp = (T_vs @ np.append(veh_center, 1))[:3] + frame = ObjectFrameEnum.START + else: + xp, yp, zp = veh_center + frame = ObjectFrameEnum.CURRENT + + new_pose = ObjectPose( + t=current_time, + x=xp, y=yp, z=zp, + yaw=yaw, pitch=pitch, roll=roll, + frame=frame + ) + + # Matching and tracking + if self.enable_tracking: + existing = match_existing_cone( + np.array([new_pose.x, new_pose.y, new_pose.z]), + dims, self.tracked_agents, + distance_threshold=1.0 + ) + if existing is not None: + old = self.tracked_agents[existing] + dt = max(new_pose.t - old.pose.t, 1e-3) + # Instantaneous velocity + vx = (new_pose.x - old.pose.x) / dt + vy = (new_pose.y - old.pose.y) / dt + vz = (new_pose.z - old.pose.z) / dt + # Yaw rate + yaw_rate = (new_pose.yaw - old.pose.yaw) / dt + speed = np.linalg.norm([vx, vy, vz]) + activity = (AgentActivityEnum.MOVING + if speed > 0.1 else AgentActivityEnum.STOPPED) + + updated = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=activity, + velocity=(vx, vy, vz), + yaw_rate=yaw_rate + ) + agents[existing] = updated + self.tracked_agents[existing] = updated + + else: + aid = f"Pedestrian{self.pedestrian_counter}" + self.pedestrian_counter += 1 + new_agent = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.STOPPED, + velocity=(0, 0, 0), + yaw_rate=0 + ) + agents[aid] = new_agent + self.tracked_agents[aid] = new_agent + for agent_id, agent in self.current_agents.items(): + p = agent.pose + rospy.loginfo( + f"Agent ID: {agent_id}\n" + f"Pose: (x: {p.x:.3f}, y: {p.y:.3f}, z: {p.z:.3f}, " + f"yaw: {p.yaw:.3f}, pitch: {p.pitch:.3f}, roll: {p.roll:.3f})\n" + f"Velocity: (vx: {agent.velocity[0]:.3f}, vy: {agent.velocity[1]:.3f}, vz: {agent.velocity[2]:.3f})\n" + f"type:{agent.activity}" + ) + else: + aid = f"Pedestrian{self.pedestrian_counter}" + self.pedestrian_counter += 1 + agents[aid] = AgentState( + pose=new_pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.STOPPED, + velocity=(0, 0, 0), + yaw_rate=0 + ) + + self.current_agents = agents + + # Remove stale tracked agents + if self.enable_tracking: + stale = [ + aid for aid, a in self.tracked_agents.items() + if current_time - a.pose.t > 10.0 + ] + for aid in stale: + del self.tracked_agents[aid] + return self.tracked_agents + + return self.current_agents + + def save_sensor_data(self, vehicle: VehicleState, latest_image) -> None: + os.makedirs("data", exist_ok=True) + t = int(self.vehicle_interface.time() * 1000) + cv2.imwrite(f"data/{t}_image.png", latest_image) + np.savez(f"data/{t}_lidar.npz", lidar=self.latest_lidar) + # Write BEFORE_TRANSFORM state + with open(f"data/{t}_vehstate.txt", "w") as f: + vp = vehicle.pose + f.write( + f"BEFORE_TRANSFORM " + f"x={vp.x:.3f}, y={vp.y:.3f}, z={vp.z:.3f}, " + f"yaw={vp.yaw:.2f}, pitch={vp.pitch:.2f}, roll={vp.roll:.2f}\n" + ) + # Compute start or current frame state + if self.use_start_frame: + if self.start_pose_abs is None: + self.start_pose_abs = vehicle.pose + vehicle_start_pose = vehicle.pose.to_frame( + ObjectFrameEnum.START, + vehicle.pose, + self.start_pose_abs + ) + mode = "START" + else: + vehicle_start_pose = vehicle.pose + mode = "CURRENT" + with open(f"data/{t}_vehstate.txt", "a") as f: + f.write( + f"AFTER_TRANSFORM " + f"x={vehicle_start_pose.x:.3f}, " + f"y={vehicle_start_pose.y:.3f}, " + f"z={vehicle_start_pose.z:.3f}, " + f"yaw={vehicle_start_pose.yaw:.2f}, " + f"pitch={vehicle_start_pose.pitch:.2f}, " + f"roll={vehicle_start_pose.roll:.2f}, " + f"frame={mode}\n" + ) + + +# ----- Fake Cone Detector 2D (for Testing Purposes) ----- +class FakConeDetector(Component): + def __init__(self, vehicle_interface: GEMInterface): + self.vehicle_interface = vehicle_interface + self.times = [(5.0, 20.0), (30.0, 35.0)] + self.t_start = None + + def rate(self): + return 4.0 + + def state_inputs(self): + return ['vehicle'] + + def state_outputs(self): + return ['agents'] + + def update(self, vehicle: VehicleState) -> Dict[str, AgentState]: + if self.t_start is None: + self.t_start = self.vehicle_interface.time() + t = self.vehicle_interface.time() - self.t_start + res = {} + for time_range in self.times: + if time_range[0] <= t <= time_range[1]: + res['Pedestrian0'] = box_to_fake_agent((0, 0, 0, 0)) + rospy.loginfo("Detected a Pedestrian (simulated)") + return res + + +def box_to_fake_agent(box): + x, y, w, h = box + pose = ObjectPose( + t=0, x=x + w / 2, y=y + h / 2, z=0, + yaw=0, pitch=0, roll=0, + frame=ObjectFrameEnum.CURRENT + ) + dims = (w, h, 0) + return AgentState( + pose=pose, + dimensions=dims, + outline=None, + type=AgentEnum.PEDESTRIAN, + activity=AgentActivityEnum.MOVING, + velocity=(0, 0, 0), + yaw_rate=0 + ) + + +if __name__ == '__main__': + pass From c0bf22f594c2e160ea0bd53e5b27b481f407b9fb Mon Sep 17 00:00:00 2001 From: Tianyu Ren <142737618+Ren990420@users.noreply.github.com> Date: Fri, 16 May 2025 09:37:16 -0500 Subject: [PATCH 5/5] Create pedestrian_detection.yaml --- launch/pedestrian_detection.yaml | 132 +++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 launch/pedestrian_detection.yaml diff --git a/launch/pedestrian_detection.yaml b/launch/pedestrian_detection.yaml new file mode 100644 index 00000000..f491a6e9 --- /dev/null +++ b/launch/pedestrian_detection.yaml @@ -0,0 +1,132 @@ +description: "Run the yielding trajectory planner on the real vehicle with real perception" +mode: hardware +vehicle_interface: gem_hardware.GEMHardwareInterface +mission_execution: StandardExecutor + +# Recovery behavior after a component failure +recovery: + planning: + trajectory_tracking : recovery.StopTrajectoryTracker + +# Driving behavior for the GEM vehicle. Runs real pedestrian perception, yield planner, but does not send commands to real vehicle. +drive: + + perception: + state_estimation: GNSSStateEstimator + agent_detection: + type: pedestrian_detection.PedestrianDetector3D + args: + camera_name: front #[front, front_right] + camera_calib_file: ./GEMstack/knowledge/calibration/gem_e4_perception_cameras.yaml + + enable_tracking: True # True if you want to enable tracking + visualize_2d: False # True to see 2D detection visualization + use_cyl_roi: False # True to use a cylinder ROI + save_data: False # True to save sensor input data + orientation: False # True to detect flipped cones + use_start_frame: True # True to output in START frame + + perception_normalization: StandardPerceptionNormalizer + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'real' + params: { + 'yielder': 'expert', # 'expert', 'analytic', or 'simulation' + 'planner': 'milestone', # 'milestone', 'dt', or 'dx' + 'desired_speed': 1.0, # m/s, 1.5 m/s seems max speed? Feb24 + 'acceleration': 0.75 # m/s2, 0.5 is not enough to start moving. Feb24 + } + route_planning: + type: StaticRoutePlanner + args: [!relative_path '../GEMstack/knowledge/routes/forward_15m.csv','start'] + motion_planning: longitudinal_planning.YieldTrajectoryPlanner + trajectory_tracking: + type: pure_pursuit.PurePursuitTrajectoryTracker + print: False + + +log: + # Specify the top-level folder to save the log files. Default is 'logs' + #folder : 'logs' + # If prefix is specified, then the log folder will be named with the prefix followed by the date and time. Default no prefix + #prefix : 'fixed_route_' + # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix + #suffix : 'test3' + # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics. + ros_topics : + # Specify options to pass to rosbag record. Default is no options. + #rosbag_options : '--split --size=1024' + # If True, then record all readings / commands of the vehicle interface. Default False + vehicle_interface : True + # Specify which components to record to behavior.json. Default records nothing + components : ['state_estimation','agent_detection','motion_planning'] + # Specify which components of state to record to state.json. Default records nothing + #state: ['all'] + # Specify the rate in Hz at which to record state to state.json. Default records at the pipeline's rate + #state_rate: 10 + # If True, then make plots of the recorded data specifically from PurePursuitTrajectoryTracker_debug.csv and report metrics. Default False + #auto_plot : True +replay: # Add items here to set certain topics / inputs to be replayed from logs + # Specify which log folder to replay from + log: + ros_topics : [] + components : [] + +#usually can keep this constant +computation_graph: !include "../GEMstack/knowledge/defaults/computation_graph.yaml" + +variants: + detector_only: + run: + description: "Run the pedestrian detection code" + drive: + planning: + trajectory_tracking: + real_sim: + run: + description: "Run the pedestrian detection code with real detection and fake simulation" + mode: hardware + vehicle_interface: + type: gem_mixed.GEMRealSensorsWithSimMotionInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + mission_execution: StandardExecutor + require_engaged: False + visualization: !include "klampt_visualization.yaml" + drive: + perception: + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim' + + fake_real: + run: + description: "Run the yielding trajectory planner on the real vehicle with faked perception" + drive: + perception: + agent_detection : pedestrian_detection.FakePedestrianDetector2D + + fake_sim: + run: + description: "Run the yielding trajectory planner in simulation with faked perception" + mode: simulation + vehicle_interface: + type: gem_simulator.GEMDoubleIntegratorSimulationInterface + args: + scene: !relative_path '../scenes/xyhead_demo.yaml' + visualization: !include "klampt_visualization.yaml" + drive: + perception: + # agent_detection : pedestrian_detection.FakePedestrianDetector2D + agent_detection : OmniscientAgentDetector #this option reads agents from the simulator + state_estimation : OmniscientStateEstimator + planning: + relations_estimation: + type: pedestrian_yield_logic.PedestrianYielder + args: + mode: 'sim'