krishauser · LucasEby · Feb 13, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
diff --git a/.gitignore b/.gitignore
@@ -166,6 +166,14 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
+# ZED run files
+**/*.run
 .vscode/
 setup/zed_sdk.run
-cuda/
+cuda/
+homework/yolov8n.pt
+homework/yolo11n.pt
+GEMstack/knowledge/detection/yolov8n.pt
+GEMstack/knowledge/detection/yolo11n.pt
+yolov8n.pt
+yolo11n.pt
diff --git a/GEMstack/onboard/perception/pedestrian_detection.py b/GEMstack/onboard/perception/pedestrian_detection.py
@@ -0,0 +1,204 @@
+from ...state import AllState,VehicleState,ObjectPose,ObjectFrameEnum,AgentState,AgentEnum,AgentActivityEnum
+from ..interface.gem import GEMInterface
+from ..component import Component
+from ultralytics import YOLO
+import cv2
+from typing import Dict
+from cv_bridge import CvBridge
+from sensor_msgs.msg import Image
+import rospy
+import os
+
+
+def box_to_fake_agent(box):
+    """Creates a fake agent state from an (x,y,w,h) bounding box.
+
+    The location and size are pretty much meaningless since this is just giving a 2D location.
+    """
+    x,y,w,h = box
+    pose = ObjectPose(t=0,x=x+w/2,y=y+h/2,z=0,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT)
+    dims = (w,h,0)
+    return AgentState(pose=pose,dimensions=dims,outline=None,type=AgentEnum.PEDESTRIAN,activity=AgentActivityEnum.MOVING,velocity=(0,0,0),yaw_rate=0)
+
+
+class PedestrianDetector2D(Component):
+    """Detects pedestrians.
+
+    self.vehicle -> GEM car info
+    self.detector -> pedestrian detection model
+    self.last_person_boxes -> used to store boxes detected in every frame of the video
+    self.pedestrians -> stores the agentstate (pedestrian) found during the video in a dict, not sure if required can be removed
+    self.visualization -> enable this to view pedestrian detected
+    self.confidence -> min model confidence level
+    self.classes_to_detect -> Classes for the model to detect
+
+    """
+    def __init__(self,vehicle_interface : GEMInterface):
+        self.vehicle_interface = vehicle_interface
+        self.detector = YOLO(os.getcwd()+'/GEMstack/knowledge/detection/yolov8n.pt') # change to get model value from sys arg
+        self.last_person_boxes = [] 
+        self.pedestrians = {}
+        self.visualization = True # Set this to true for visualization, later change to get value from sys arg
+        self.confidence = 0.7
+        self.classes_to_detect = 0
+
+    def rate(self):
+        return 4.0
+
+    def state_inputs(self):
+        return ['vehicle']
+
+    def state_outputs(self):
+        return ['agents']
+
+    def initialize(self):
+
+        """Initializes subscribers and publishers
+
+        self.vehicle_interface.subscribe_sensor -> GEM Car camera subscription
+        self.rosbag_test -> ROS Bag topic subscription
+        self.pub_image -> Publishes Image with detection for visualization
+
+        """
+        #tell the vehicle to use image_callback whenever 'front_camera' gets a reading, and it expects images of type cv2.Mat
+
+        # GEM Car subacriber
+        # self.vehicle_interface.subscribe_sensor('front_camera',self.image_callback,cv2.Mat)
+
+        # Webcam
+        # rospy.Subscriber('/webcam', Image, self.image_callback)
+
+        # Testing with rosbag
+        self.rosbag_test = rospy.Subscriber('/oak/rgb/image_raw',Image, self.image_callback,queue_size=1)
+        if(self.visualization):
+            self.pub_image = rospy.Publisher("/camera/image_detection", Image, queue_size=1)
+        pass
+
+    # Use cv2.Mat for GEM Car, Image for RosBag
+    def image_callback(self, image : Image): #image : cv2.Mat):
+
+        """Detects pedestrians using the model provided when new image is passed.
+
+        Converts Image.msg to cv2 format (Might need to change this to use cv2.Mat) and uses the model to detect pedestrian
+        IF visualization is true, will publish an image with pedestrians detected.
+
+        Hardcoded values for now:
+            Detected only pedestrians -> Class = 0
+            Confidence level -> 0.7
+
+        """
+
+        # Use Image directly for GEM Car
+        # track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
+
+        # Convert to CV2 format for RosBag
+        bridge = CvBridge() 
+        image = bridge.imgmsg_to_cv2(image, "bgr8") 
+        track_result = self.detector.track(source=image, classes=self.classes_to_detect, persist=True, conf=self.confidence)
+
+        self.last_person_boxes = []
+        boxes = track_result[0].boxes
+
+        # Used for visualization
+        if(self.visualization):
+            label_text = "Pedestrian "
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 0.5
+            font_color = (255, 255, 255)  # White text
+            outline_color = (0, 0, 0)  # Black outline
+            line_type = 1
+            text_thickness = 2 # Text thickness
+            outline_thickness = 1  # Thickness of the text outline
+
+        # Unpacking box dimentions detected into x,y,w,h
+        for box in boxes:
+
+            xywh = box.xywh[0].tolist()
+            self.last_person_boxes.append(xywh)
+            x, y, w, h = xywh
+            id = box.id.item()
+
+            # Stores AgentState in a dict, can be removed if not required
+            pose = ObjectPose(t=0,x=x,y=y,z=0,yaw=0,pitch=0,roll=0,frame=ObjectFrameEnum.CURRENT)
+            dims = (w,h,0)
+            if(id not in self.pedestrians.keys()):
+                self.pedestrians[id] = AgentState(pose=pose,dimensions=dims,outline=None,type=AgentEnum.PEDESTRIAN,activity=AgentActivityEnum.MOVING,velocity=(0,0,0),yaw_rate=0)
+            else:
+                self.pedestrians[id].pose = pose
+                self.pedestrians[id].dims = dims
+
+            # Used for visualization
+            if(self.visualization):
+                # Draw bounding box
+                cv2.rectangle(image, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (255, 0, 255), 3)
+
+                # Define text label
+                x = int(x - w / 2)
+                y = int(y - h / 2)
+                label = label_text + str(id) + " : " + str(round(box.conf.item(), 2))
+
+                # Get text size
+                text_size, baseline = cv2.getTextSize(label, font, font_scale, line_type)
+                text_w, text_h = text_size
+
+                # Position text above the bounding box
+                text_x = x
+                text_y = y - 10 if y - 10 > 10 else y + h + text_h
+
+                # Draw text outline for better visibility, uncomment for outline
+                # for dx, dy in [(-1, -1), (-1, 1), (1, -1), (1, 1)]:  
+                #     cv2.putText(image, label, (text_x + dx, text_y - baseline + dy), font, font_scale, outline_color, outline_thickness)
+
+                # Draw main text on top of the outline
+                cv2.putText(image, label, (text_x, text_y - baseline), font, font_scale, font_color, text_thickness)
+
+
+        # Used for visualization
+        if(self.visualization):
+            ros_img = bridge.cv2_to_imgmsg(image, 'bgr8')
+            self.pub_image.publish(ros_img)
+
+        #uncomment if you want to debug the detector...
+        # print(self.last_person_boxes)
+        # print(self.pedestrians.keys())
+        #for bb in self.last_person_boxes:
+        #    x,y,w,h = bb
+        #    cv2.rectangle(image, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (255, 0, 255), 3)
+        #cv2.imwrite("pedestrian_detections.png",image)
+
+    def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
+        res = {}
+        for i,b in enumerate(self.last_person_boxes):
+            x,y,w,h = b
+            res['pedestrian'+str(i)] = box_to_fake_agent(b)
+        if len(res) > 0:
+            print("Detected",len(res),"pedestrians")
+        return res
+
+
+class FakePedestrianDetector2D(Component):
+    """Triggers a pedestrian detection at some random time ranges"""
+    def __init__(self,vehicle_interface : GEMInterface):
+        self.vehicle_interface = vehicle_interface
+        self.times = [(5.0,20.0),(30.0,35.0)]
+        self.t_start = None
+
+    def rate(self):
+        return 4.0
+
+    def state_inputs(self):
+        return ['vehicle']
+
+    def state_outputs(self):
+        return ['agents']
+
+    def update(self, vehicle : VehicleState) -> Dict[str,AgentState]:
+        if self.t_start is None:
+            self.t_start = self.vehicle_interface.time()
+        t = self.vehicle_interface.time() - self.t_start
+        res = {}
+        for times in self.times:
+            if t >= times[0] and t <= times[1]:
+                res['pedestrian0'] = box_to_fake_agent((0,0,0,0))
+                print("Detected a pedestrian")
+        return res
diff --git a/homework/longitudinal_planning.py → ...onboard/planning/longitudinal_planning.py b/homework/longitudinal_planning.py → ...onboard/planning/longitudinal_planning.py
diff --git a/homework/pedestrian_yield_logic.py → ...nboard/planning/pedestrian_yield_logic.py b/homework/pedestrian_yield_logic.py → ...nboard/planning/pedestrian_yield_logic.py
diff --git a/homework/pedestrian_detection.py b/homework/pedestrian_detection.py
diff --git a/homework/person_detector.py b/homework/person_detector.py
diff --git a/launch/gather_data.yaml b/launch/gather_data.yaml
@@ -26,7 +26,12 @@ log:
     # If suffix is specified, then logs will output to folder/prefix+suffix. Default uses date and time as the suffix
     #suffix : 'test3'  
     # Specify which ros topics to record to vehicle.bag. Default records nothing. This records the "standard" ROS topics.
-    ros_topics : []
+    ros_topics : ['/oak/left/image_raw',
+        '/oak/rgb/image_raw',
+        '/oak/right/image_raw',
+        '/oak/stereo/image_raw',
+        '/ouster/points',
+        '/livox/lidar ']
     # Specify options to pass to rosbag record. Default is no options.
     #rosbag_options : '--split --size=1024' 
     # If True, then record all readings / commands of the vehicle interface. Default False

diff --git a/homework/pedestrian_detection.yaml → launch/pedestrian_detection.yaml b/homework/pedestrian_detection.yaml → launch/pedestrian_detection.yaml