From 535fd8c8799ea9bddf91ce50c79c84a079a0519b Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Fri, 19 Dec 2025 18:07:34 +0100 Subject: [PATCH] feat: improve handling of path buffers Since paths can be in non UTF-8 formats, we should strive to use types that are able to handle this situation when possible. In the case of the exe_path and filename fields that come from the kernel, handling has been changed to treat them as binary blobs (same way the kernel d_path function does) and the length is now being sent to userspace. With this change, userspace is able to reconstruct a path in a fully safe way from the path buffer, treating it as a slice and using the first len - 1 (exclude the null terminator) bytes. This way we are able to preserve the path all the way to the gRPC event, which requires UTF-8 compliant strings. In a future change we might want to change path handling in our protobuffs to use byte blobs instead, but this will probably also require an effort from front end to properly display the data. --- fact-ebpf/src/bpf/bound_path.h | 1 + fact-ebpf/src/bpf/d_path.h | 2 +- fact-ebpf/src/bpf/events.h | 5 +++-- fact-ebpf/src/bpf/main.c | 4 ++-- fact-ebpf/src/bpf/process.h | 2 +- fact-ebpf/src/bpf/types.h | 2 ++ fact/src/event/mod.rs | 26 ++++++++++++++++++++------ fact/src/event/process.rs | 18 ++++++++---------- 8 files changed, 38 insertions(+), 22 deletions(-) diff --git a/fact-ebpf/src/bpf/bound_path.h b/fact-ebpf/src/bpf/bound_path.h index 1c9430cf..9c0870f1 100644 --- a/fact-ebpf/src/bpf/bound_path.h +++ b/fact-ebpf/src/bpf/bound_path.h @@ -69,6 +69,7 @@ __always_inline static enum path_append_status_t path_append_dentry(struct bound path->len += len; path_write_char(path->path, path->len, '\0'); + path->len++; return 0; } diff --git a/fact-ebpf/src/bpf/d_path.h b/fact-ebpf/src/bpf/d_path.h index 506b1c06..c064ba1b 100644 --- a/fact-ebpf/src/bpf/d_path.h +++ b/fact-ebpf/src/bpf/d_path.h @@ -77,7 +77,7 @@ __always_inline static long __d_path(const struct path* path, char* buf, int buf dentry = parent; } - bpf_probe_read_str(buf, buflen, &helper->buf[offset]); + bpf_probe_read(buf, buflen - offset, &helper->buf[offset]); return buflen - offset; } diff --git a/fact-ebpf/src/bpf/events.h b/fact-ebpf/src/bpf/events.h index ab027bb4..f4e6ff4b 100644 --- a/fact-ebpf/src/bpf/events.h +++ b/fact-ebpf/src/bpf/events.h @@ -13,7 +13,7 @@ __always_inline static void submit_event(struct metrics_by_hook_t* m, file_activity_type_t event_type, - const char filename[PATH_MAX], + struct bound_path_t* path, inode_key_t* inode, bool use_bpf_d_path) { struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0); @@ -25,7 +25,8 @@ __always_inline static void submit_event(struct metrics_by_hook_t* m, event->type = event_type; event->timestamp = bpf_ktime_get_boot_ns(); inode_copy_or_reset(&event->inode, inode); - bpf_probe_read_str(event->filename, PATH_MAX, filename); + bpf_probe_read(event->filename, path->len & (PATH_MAX - 1), path->path); + event->filename_len = path->len; struct helper_t* helper = get_helper(); if (helper == NULL) { diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index 19b6ea49..e308efbf 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -56,7 +56,7 @@ int BPF_PROG(trace_file_open, struct file* file) { break; } - submit_event(&m->file_open, event_type, path->path, &inode_key, true); + submit_event(&m->file_open, event_type, path, &inode_key, true); return 0; @@ -116,7 +116,7 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { submit_event(&m->path_unlink, FILE_ACTIVITY_UNLINK, - path->path, + path, &inode_key, path_unlink_supports_bpf_d_path); return 0; diff --git a/fact-ebpf/src/bpf/process.h b/fact-ebpf/src/bpf/process.h index 5061ee38..340c4717 100644 --- a/fact-ebpf/src/bpf/process.h +++ b/fact-ebpf/src/bpf/process.h @@ -131,7 +131,7 @@ __always_inline static int64_t process_fill(process_t* p, bool use_bpf_d_path) { return -1; } - d_path(&task->mm->exe_file->f_path, p->exe_path, PATH_MAX, use_bpf_d_path); + p->exe_path_len = d_path(&task->mm->exe_file->f_path, p->exe_path, PATH_MAX, use_bpf_d_path); const char* cg = get_memory_cgroup(helper); if (cg != NULL) { diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index 17c1f43b..735aeab2 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -22,6 +22,7 @@ typedef struct process_t { char args[4096]; unsigned int args_len; char exe_path[PATH_MAX]; + short unsigned int exe_path_len; char memory_cgroup[PATH_MAX]; unsigned int uid; unsigned int gid; @@ -53,6 +54,7 @@ struct event_t { unsigned long timestamp; process_t process; char filename[PATH_MAX]; + short unsigned int filename_len; inode_key_t inode; file_activity_type_t type; }; diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 205b6339..f2b419ab 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -1,16 +1,28 @@ #[cfg(test)] use std::time::{SystemTime, UNIX_EPOCH}; -use std::{ffi::CStr, os::raw::c_char, path::PathBuf}; +use std::{ + ffi::{CStr, OsString}, + os::{raw::c_char, unix::ffi::OsStringExt}, + path::PathBuf, +}; use serde::Serialize; -use fact_ebpf::{event_t, file_activity_type_t, inode_key_t, PATH_MAX}; +use fact_ebpf::{event_t, file_activity_type_t, inode_key_t}; use crate::host_info; use process::Process; pub(crate) mod process; +fn slice_to_pathbuf(s: &[c_char]) -> PathBuf { + #[cfg(target_arch = "x86_64")] + let v = s.iter().map(|c| *c as u8).collect::>(); + #[cfg(not(target_arch = "x86_64"))] + let v = s.to_vec(); + OsString::from_vec(v).into() +} + fn slice_to_string(s: &[c_char]) -> anyhow::Result { Ok(unsafe { CStr::from_ptr(s.as_ptr()) }.to_str()?.to_owned()) } @@ -85,7 +97,9 @@ impl TryFrom<&event_t> for Event { fn try_from(value: &event_t) -> Result { let process = Process::try_from(value.process)?; let timestamp = host_info::get_boot_time() + value.timestamp; - let file = FileData::new(value.type_, value.filename, value.inode)?; + let filename_len = value.filename_len as usize; + let (filename, _) = value.filename.as_slice().split_at(filename_len - 1); + let file = FileData::new(value.type_, filename, value.inode)?; Ok(Event { timestamp, @@ -128,7 +142,7 @@ pub enum FileData { impl FileData { pub fn new( event_type: file_activity_type_t, - filename: [c_char; PATH_MAX as usize], + filename: &[c_char], inode: inode_key_t, ) -> anyhow::Result { let inner = BaseFileData::new(filename, inode)?; @@ -185,8 +199,8 @@ pub struct BaseFileData { } impl BaseFileData { - pub fn new(filename: [c_char; PATH_MAX as usize], inode: inode_key_t) -> anyhow::Result { - let filename = slice_to_string(&filename)?.into(); + pub fn new(filename: &[c_char], inode: inode_key_t) -> anyhow::Result { + let filename = slice_to_pathbuf(filename); Ok(BaseFileData { filename, diff --git a/fact/src/event/process.rs b/fact/src/event/process.rs index aeb8ea5c..d8dc7487 100644 --- a/fact/src/event/process.rs +++ b/fact/src/event/process.rs @@ -1,4 +1,4 @@ -use std::ffi::CStr; +use std::{ffi::CStr, path::PathBuf}; use fact_ebpf::{lineage_t, process_t}; use serde::Serialize; @@ -6,7 +6,7 @@ use uuid::Uuid; use crate::host_info; -use super::slice_to_string; +use super::{slice_to_pathbuf, slice_to_string}; #[derive(Debug, Clone, Default, Serialize)] pub struct Lineage { @@ -48,7 +48,7 @@ impl From for fact_api::process_signal::LineageInfo { pub struct Process { comm: String, args: Vec, - exe_path: String, + exe_path: PathBuf, container_id: Option, uid: u32, username: &'static str, @@ -66,11 +66,7 @@ impl Process { pub fn current() -> Self { use crate::host_info::{get_host_mount_ns, get_mount_ns}; - let exe_path = std::env::current_exe() - .expect("Failed to get current exe") - .into_os_string() - .into_string() - .unwrap(); + let exe_path = std::env::current_exe().expect("Failed to get current exe"); let args = std::env::args().collect::>(); let cgroup = std::fs::read_to_string("/proc/self/cgroup").expect("Failed to read cgroup"); let container_id = Process::extract_container_id(&cgroup); @@ -142,7 +138,9 @@ impl TryFrom for Process { fn try_from(value: process_t) -> Result { let comm = slice_to_string(value.comm.as_slice())?; - let exe_path = slice_to_string(value.exe_path.as_slice())?; + let exe_path_len = value.exe_path_len as usize; + let (exe_path, _) = value.exe_path.as_slice().split_at(exe_path_len - 1); + let exe_path = slice_to_pathbuf(exe_path); let memory_cgroup = unsafe { CStr::from_ptr(value.memory_cgroup.as_ptr()) }.to_str()?; let container_id = Process::extract_container_id(memory_cgroup); let in_root_mount_ns = value.in_root_mount_ns != 0; @@ -213,7 +211,7 @@ impl From for fact_api::ProcessSignal { creation_time: None, name: comm, args, - exec_file_path: exe_path, + exec_file_path: exe_path.to_string_lossy().into_owned(), pid, uid, gid,