From 8ad22415c00550d82c2738a448e822ee978b6887 Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Thu, 9 Oct 2025 15:06:27 +0200 Subject: [PATCH] ROX-30302: implement heap profiler This new heap profiler is based on jemalloc and directly handled on the endpoints module. The implementation is not complete and requires we replace the allocator we use with jemalloc which has been archived a few months ago, so we still need to have a talk with the wider team to decide whether this is something we want to pursue. For cleanness, the feature is behind a compile time feature, so if we decide to go with this implementation, we can at least decide when we want to replace the default allocator. --- Cargo.lock | 39 +++++++++++++++++++ Cargo.toml | 2 + fact/Cargo.toml | 4 ++ fact/src/endpoints.rs | 90 ++++++++++++++++++++++++++++++++++++++++++- fact/src/main.rs | 8 ++++ 5 files changed, 142 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index f7e85890..686815c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -530,6 +530,8 @@ dependencies = [ "serde", "serde_json", "tempfile", + "tikv-jemalloc-ctl", + "tikv-jemallocator", "tokio", "tokio-stream", "tonic", @@ -1087,6 +1089,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1677,6 +1685,37 @@ dependencies = [ "syn", ] +[[package]] +name = "tikv-jemalloc-ctl" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b" +dependencies = [ + "libc", + "paste", + "tikv-jemalloc-sys", +] + +[[package]] +name = "tikv-jemalloc-sys" +version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd3c60906412afa9c2b5b5a48ca6a5abe5736aec9eb48ad05037a677e52e4e2d" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "tikv-jemallocator" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cec5ff18518d81584f477e9bfdf957f5bb0979b0bac3af4ca30b5b3ae2d2865" +dependencies = [ + "libc", + "tikv-jemalloc-sys", +] + [[package]] name = "tokio" version = "1.46.0" diff --git a/Cargo.toml b/Cargo.toml index 72567501..d9f5a9c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,8 @@ pprof = { version = "0.15.0", features = ["prost-codec"] } prometheus-client = { version = "0.24.0", default-features = false } prost = "0.13.5" prost-types = "0.13.5" +tikv-jemalloc-ctl = { version = "0.6.0", features = ["profiling"] } +tikv-jemallocator = { version = "0.6.0", features = ["profiling", "unprefixed_malloc_on_supported_platforms"] } serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.142" tokio = { version = "1.40.0", default-features = false, features = [ diff --git a/fact/Cargo.toml b/fact/Cargo.toml index 6552eb9a..c284e16f 100644 --- a/fact/Cargo.toml +++ b/fact/Cargo.toml @@ -22,6 +22,9 @@ pprof = { workspace = true } prometheus-client = { workspace = true } prost = { workspace = true } prost-types = { workspace = true } +tempfile = { workspace = true, optional = true} +tikv-jemalloc-ctl = { workspace = true, optional = true } +tikv-jemallocator = { workspace = true, optional = true } serde = { workspace = true } serde_json = { workspace = true } uuid = { workspace = true } @@ -42,3 +45,4 @@ path = "src/main.rs" [features] bpf-test = [] +jemalloc = ["tikv-jemallocator", "tikv-jemalloc-ctl", "tempfile"] diff --git a/fact/src/endpoints.rs b/fact/src/endpoints.rs index 5a1a557f..51c0caeb 100644 --- a/fact/src/endpoints.rs +++ b/fact/src/endpoints.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "jemalloc")] +use std::ffi::CString; use std::{future::Future, net::SocketAddr, pin::Pin}; use http_body_util::{BodyExt, Full}; @@ -9,6 +11,10 @@ use hyper::{ }; use hyper_util::rt::TokioIo; use log::{info, warn}; +#[cfg(feature = "jemalloc")] +use tempfile::NamedTempFile; +#[cfg(feature = "jemalloc")] +use tikv_jemalloc_ctl::raw as mallctl; use tokio::{net::TcpListener, sync::watch, task::JoinHandle}; use crate::metrics::exporter::Exporter; @@ -148,7 +154,7 @@ impl Server { Ok(_) => Server::response_with_content_type( StatusCode::OK, "text/plain", - "CPU profiler starter", + "CPU profiler started", ), Err(e) => Server::response( StatusCode::INTERNAL_SERVER_ERROR, @@ -190,6 +196,79 @@ impl Server { ), } } + + #[cfg(feature = "jemalloc")] + async fn handle_heap_profiler(&self, body: Incoming) -> ServerResponse { + let body = match body.collect().await { + Ok(b) => b.to_bytes(), + Err(e) => { + return Server::response( + StatusCode::BAD_REQUEST, + format!("Failed to read request body: {e}"), + ) + } + }; + + let state = if body == "on" { + true + } else if body == "off" { + false + } else { + return Server::response( + StatusCode::BAD_REQUEST, + format!("Invalid request body: {body:?}"), + ); + }; + + let res = unsafe { mallctl::update(b"prof.active\0", true) }; + + match res { + Ok(_) => Server::response_with_content_type( + StatusCode::OK, + "text/plain", + format!( + "Heap profiler {}", + if state { "started" } else { "stopped" } + ), + ), + Err(e) => Server::response( + StatusCode::INTERNAL_SERVER_ERROR, + format!( + "Failed to {} heap profiler: {e}", + if state { "start" } else { "stop" } + ), + ), + } + } + + #[cfg(feature = "jemalloc")] + async fn handle_heap_report(&self) -> ServerResponse { + let f = match NamedTempFile::new() { + Ok(f) => f, + Err(e) => { + return Server::response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to create dump file: {e}"), + ); + } + }; + let path = CString::new(f.path().as_os_str().as_encoded_bytes()).unwrap(); + + if let Err(e) = unsafe { mallctl::write(b"prof.dump\0", path.as_ptr()) } { + return Server::response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to dump heap profile: {e}"), + ); + } + + match std::fs::read_to_string(f.path()) { + Ok(profile) => Server::response(StatusCode::OK, profile), + Err(e) => Server::response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to read heap profile dump: {e}"), + ), + } + } } impl Service> for Server { @@ -205,6 +284,15 @@ impl Service> for Server { (&Method::GET, "/health_check") => s.handle_health_check(), (&Method::POST, "/profile/cpu") => s.handle_cpu_profiler(req.into_body()).await, (&Method::GET, "/profile/cpu") => s.handle_cpu_report().await, + #[cfg(feature = "jemalloc")] + (&Method::POST, "/profile/heap") => s.handle_heap_profiler(req.into_body()).await, + #[cfg(feature = "jemalloc")] + (&Method::GET, "/profile/heap") => s.handle_heap_report().await, + #[cfg(not(feature = "jemalloc"))] + (_, "/profile/heap") => Server::response( + StatusCode::SERVICE_UNAVAILABLE, + "Heap profiler not supported", + ), (&Method::GET, "/profile") => s.handle_profiler_status().await, _ => Server::response(StatusCode::NOT_FOUND, ""), } diff --git a/fact/src/main.rs b/fact/src/main.rs index 7f1efb81..19826a87 100644 --- a/fact/src/main.rs +++ b/fact/src/main.rs @@ -1,5 +1,13 @@ use fact::config::FactConfig; +#[cfg(feature = "jemalloc")] +#[global_allocator] +static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + +#[allow(non_upper_case_globals)] +#[export_name = "malloc_conf"] +pub static malloc_conf: &[u8] = b"prof:true,prof_active:false\0"; + #[tokio::main] async fn main() -> anyhow::Result<()> { fact::init_log()?;