From e7483e50529a0f827df612f6dc74e074131ab4e7 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Tue, 23 Sep 2025 10:15:31 +0300 Subject: [PATCH 1/8] feat(stapsdt): Allow compiler to choose argument registers --- tests/does-it-work/src/main.rs | 60 ++++++++++++++++++--- usdt-impl/src/stapsdt.rs | 97 +++++++++++++++++++++++++++++++++- usdt-impl/src/stapsdt/args.rs | 69 ++++-------------------- 3 files changed, 157 insertions(+), 69 deletions(-) diff --git a/tests/does-it-work/src/main.rs b/tests/does-it-work/src/main.rs index acaa5055..a52f1f1f 100644 --- a/tests/does-it-work/src/main.rs +++ b/tests/does-it-work/src/main.rs @@ -245,18 +245,62 @@ mod tests { // Verify the argument types let line = lines.next().expect("Expected a line containing arguments"); let line = line.trim(); - let arguments_line = if cfg!(target_arch = "x86_64") { - "Arguments: 1@%dil 8@%rsi" - } else if cfg!(target_arch = "aarch64") { - "Arguments: 1@x0 8@x1" - } else { - unreachable!("Unsupported Linux target architecture") - }; + let (args_header, args_rest) = line + .split_once(" ") + .expect("Expected arguments line to have one space"); + let (first_arg, second_arg) = args_rest + .split_once(" ") + .expect("Expected arguments line to have two spaces"); + let (first_arg_size, first_arg_register) = first_arg + .split_once("@") + .expect("Expected first argument to have @ sign"); + let (second_arg_size, second_arg_register) = second_arg + .split_once("@") + .expect("Expected first argument to have @ sign"); + assert_eq!( + first_arg_size, "1", + "First argument size appears incorrect: {}", + first_arg_size + ); assert_eq!( - line, arguments_line, + second_arg_size, "8", + "Second argument size appears incorrect: {}", + second_arg_size + ); + assert_eq!( + args_header, "Arguments:", "Arguments line appears incorrect: {}", line ); + if cfg!(target_arch = "x86_64") { + assert!( + ["%ah", "%al", "%bh", "%bl", "%ch", "%cl", "%dh", "%dl", "%dil", "%sil"] + .contains(&first_arg_register), + "First argument register appears incorrect: {}", + first_arg_register + ); + assert!( + ["%rax", "%rbx", "%rcx", "%rdx", "%rdi", "%rsi"].contains(&second_arg_register), + "Second argument register appears incorrect: {}", + second_arg_register + ); + } else if cfg!(target_arch = "aarch64") { + fn is_valid_register(reg: &str) -> bool { + reg.starts_with('x') && str::parse::(®[1..]).is_ok_and(|val| val <= 30) + } + assert!( + is_valid_register(first_arg_register), + "First argument register appears incorrect: {}", + first_arg_register + ); + assert!( + is_valid_register(second_arg_register), + "Second argument register appears incorrect: {}", + second_arg_register + ); + } else { + unreachable!("Unsupported Linux target architecture") + } thr.join().expect("Failed to join test runner thread"); } diff --git a/usdt-impl/src/stapsdt.rs b/usdt-impl/src/stapsdt.rs index a04aaff7..dfc28467 100644 --- a/usdt-impl/src/stapsdt.rs +++ b/usdt-impl/src/stapsdt.rs @@ -22,9 +22,11 @@ #[path = "stapsdt/args.rs"] mod args; +use crate::common::call_argument_closure; use crate::{common, DataType}; use crate::{Probe, Provider}; use args::format_argument; +use dtrace_parser::{BitWidth, Integer}; use proc_macro2::TokenStream; use quote::{format_ident, quote}; use std::convert::TryFrom; @@ -187,12 +189,96 @@ _.stapsdt.base: ) } +// Convert a supported data type to 1. a type to store for the duration of the +// probe invocation and 2. a transformation for compatibility with an asm +// register. +pub fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenStream) { + match typ { + DataType::Serializable(_) => ( + // Convert the input to JSON. This is a fallible operation, however, so we wrap the + // data in a result-like JSON blob, mapping the `Result`'s variants to the keys "ok" + // and "err". + quote! { + [ + match ::usdt::to_json(&#input) { + Ok(json) => format!("{{\"ok\":{}}}", json), + Err(e) => format!("{{\"err\":\"{}\"}}", e.to_string()), + }.as_bytes(), + &[0_u8] + ].concat() + }, + quote! { .as_ptr() as usize }, + ), + DataType::Native(dtrace_parser::DataType::String) => ( + quote! { + [(#input.as_ref() as &str).as_bytes(), &[0_u8]].concat() + }, + quote! { .as_ptr() as usize }, + ), + DataType::Native(_) => { + let ty = typ.to_rust_type(); + ( + quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input)) }, + quote! {}, + ) + } + DataType::UniqueId => (quote! { #input.as_u64() as usize }, quote! {}), + } +} + +// Return code to destructure a probe arguments into identifiers, and to pass those to ASM +// registers. +pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) { + // SystemTap probes don't have a strict calling convention; they'd take + // even sp-relative addresses. + let (unpacked_args, in_regs): (Vec<_>, Vec<_>) = types + .iter() + .enumerate() + .map(|(i, typ)| { + let arg = format_ident!("arg_{}", i); + let index = syn::Index::from(i); + let input = quote! { args.#index }; + let (value, at_use) = asm_type_convert(typ, input); + + // These values must refer to the actual traced data and prevent it + // from being dropped until after we've completed the probe + // invocation. + let destructured_arg = quote! { + let #arg = #value; + }; + // Here, we convert the argument to store it within a register. + let register_arg = if (cfg!(target_arch = "x86_64") || cfg!(target_arch = "x86")) + && matches!( + typ, + DataType::Native(dtrace_parser::DataType::Integer(Integer { + sign: _, + width: BitWidth::Bit8 + })) + ) { + // x86 has a special "reg_byte" register class for taking in + // bytes. + quote! { #arg = in(reg_byte) (#arg #at_use) } + } else { + quote! { #arg = in(reg) (#arg #at_use) } + }; + (destructured_arg, register_arg) + }) + .unzip(); + let arg_lambda = call_argument_closure(types); + let unpacked_args = quote! { + #arg_lambda + #(#unpacked_args)* + }; + let in_regs = quote! { #(#in_regs,)* }; + (unpacked_args, in_regs) +} + fn compile_probe( provider: &Provider, probe: &Probe, config: &crate::CompileProvidersConfig, ) -> TokenStream { - let (unpacked_args, in_regs) = common::construct_probe_args(&probe.types); + let (unpacked_args, in_regs) = construct_probe_args(&probe.types); let probe_rec = emit_probe_record(&provider.name, &probe.name, Some(&probe.types)); let type_check_fn = common::construct_type_check( &provider.name, @@ -202,6 +288,13 @@ fn compile_probe( ); let sema_name = format_ident!("__usdt_sema_{}_{}", provider.name, probe.name); + let options = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "x86") { + // Use att_syntax on x86 to generate GNU Assembly Syntax for the + // registers automatically. + quote! { options(att_syntax, nomem, nostack, preserves_flags) } + } else { + quote! { options(nomem, nostack, preserves_flags) } + }; let impl_block = quote! { unsafe extern "C" { // Note: C libraries use a struct containing an unsigned short @@ -226,7 +319,7 @@ fn compile_probe( "990: nop", #probe_rec, #in_regs - options(nomem, nostack, preserves_flags) + #options ); } } diff --git a/usdt-impl/src/stapsdt/args.rs b/usdt-impl/src/stapsdt/args.rs index 58e863a2..a7f7683f 100644 --- a/usdt-impl/src/stapsdt/args.rs +++ b/usdt-impl/src/stapsdt/args.rs @@ -21,7 +21,7 @@ use dtrace_parser::{BitWidth, DataType as NativeDataType, Integer, Sign}; /// that reads the integer's value from the correct register. Effectively this /// means generating a string like `%REG` where `REG` is the register that the /// data is located in. -fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> &'static str { +fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> String { // See common.rs for note on argument passing and maximum supported // argument count. assert!( @@ -29,71 +29,22 @@ fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> &'static str { "Up to 6 probe arguments are currently supported" ); if cfg!(target_arch = "x86_64") { - match (integer.width, reg_index) { - (BitWidth::Bit8, 0) => "%dil", - (BitWidth::Bit16, 0) => "%di", - (BitWidth::Bit32, 0) => "%edi", - (BitWidth::Bit64, 0) => "%rdi", - (BitWidth::Bit8, 1) => "%sil", - (BitWidth::Bit16, 1) => "%si", - (BitWidth::Bit32, 1) => "%esi", - (BitWidth::Bit64, 1) => "%rsi", - (BitWidth::Bit8, 2) => "%dl", - (BitWidth::Bit16, 2) => "%dx", - (BitWidth::Bit32, 2) => "%edx", - (BitWidth::Bit64, 2) => "%rdx", - (BitWidth::Bit8, 3) => "%cl", - (BitWidth::Bit16, 3) => "%cx", - (BitWidth::Bit32, 3) => "%ecx", - (BitWidth::Bit64, 3) => "%rcx", - (BitWidth::Bit8, 4) => "%r8b", - (BitWidth::Bit16, 4) => "%r8w", - (BitWidth::Bit32, 4) => "%r8d", - (BitWidth::Bit64, 4) => "%r8", - (BitWidth::Bit8, 5) => "%r9b", - (BitWidth::Bit16, 5) => "%r9w", - (BitWidth::Bit32, 5) => "%r9d", - (BitWidth::Bit64, 5) => "%r9", + match integer.width { + BitWidth::Bit8 => format!("{{arg_{reg_index}}}"), + BitWidth::Bit16 => format!("{{arg_{reg_index}:x}}"), + BitWidth::Bit32 => format!("{{arg_{reg_index}:e}}"), + BitWidth::Bit64 => format!("{{arg_{reg_index}:r}}"), #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 0) => "%edi", + BitWidth::Pointer => format!("{{arg_{reg_index}:e}}"), #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 0) => "%rdi", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 1) => "%esi", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 1) => "%rsi", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 2) => "%edx", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 2) => "%rdx", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 3) => "%ecx", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 3) => "%rcx", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 4) => "%e8", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 4) => "%r8", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 5) => "%e9", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 5) => "%r9", + BitWidth::Pointer => format!("{{arg_{reg_index}:r}}"), #[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))] - (BitWidth::Pointer, _) => compile_error!("Unsupported pointer width"), - _ => unreachable!(), + BitWidth::Pointer => compile_error!("Unsupported pointer width"), } } else if cfg!(target_arch = "aarch64") { // GNU Assembly syntax for SystemTap only uses the extended register // for some reason. - match reg_index { - 0 => "x0", - 1 => "x1", - 2 => "x2", - 3 => "x3", - 4 => "x4", - 5 => "x5", - _ => unreachable!(), - } + format!("{{arg_{reg_index}:x}}") } else { unreachable!("Unsupported Linux target architecture") } From 09cb2868b23e378b2e518d74eab2486f9c97f718 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 5 Nov 2025 21:45:41 +0200 Subject: [PATCH 2/8] fix: add comment to argument line parsing --- tests/does-it-work/src/main.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/does-it-work/src/main.rs b/tests/does-it-work/src/main.rs index a52f1f1f..dc184b16 100644 --- a/tests/does-it-work/src/main.rs +++ b/tests/does-it-work/src/main.rs @@ -242,7 +242,12 @@ mod tests { semaphore_address ); - // Verify the argument types + // Verify the argument types; the format of the line is eg. + // "Arguments: size@%reg size@%reg", but we cannot know which + // registers the compiler has chosen to use for arguments passing, + // so we have to instead split out the two arguments we expect to + // find for this probe specifically, check their sizes, and match + // the register strings ("%reg") against a list of possible values. let line = lines.next().expect("Expected a line containing arguments"); let line = line.trim(); let (args_header, args_rest) = line From c524e75a8df9781ca0a63a4e83edbe39fde6ebc1 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 5 Nov 2025 21:56:15 +0200 Subject: [PATCH 3/8] fix: use methods from common.rs --- usdt-impl/src/common.rs | 76 +++++++++++++++++++++++++++++++---- usdt-impl/src/stapsdt.rs | 87 +--------------------------------------- 2 files changed, 70 insertions(+), 93 deletions(-) diff --git a/usdt-impl/src/common.rs b/usdt-impl/src/common.rs index e56b8d47..ff72fc64 100644 --- a/usdt-impl/src/common.rs +++ b/usdt-impl/src/common.rs @@ -101,6 +101,26 @@ fn shared_slice_elem_type(reference: &syn::TypeReference) -> Option<&syn::Type> } } +/// Returns true if the DataType requires the special "reg_byte" register class +/// to be used for the `asm!` macro arguments passing. This only happens for +/// STAPSDT probes on x86. +#[cfg(usdt_backend_stapsdt)] +fn use_reg_byte(typ: &DataType) -> bool { + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + use dtrace_parser::{BitWidth, Integer}; + matches!( + typ, + DataType::Native(dtrace_parser::DataType::Integer(Integer { + sign: _, + width: BitWidth::Bit8 + })) + ) + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] + false +} + // Return code to destructure a probe arguments into identifiers, and to pass those to ASM // registers. pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) { @@ -135,8 +155,27 @@ pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) { let #arg = #value; }; // Here, we convert the argument to store it within a register. - let register_arg = quote! { in(#reg) (#arg #at_use) }; - + let register_arg = if cfg!(usdt_backend_stapsdt) { + // In SystemTap probes, the arguments can be passed freely in + // any registers without regard to standard function call ABIs. + // We thus do not need the register names in the STAPSDT + // backend. Intead, we use the "name = in(reg) arg" pattern to + // bind each argument into a local name (we shadow the original + // argument name here): this name can then be used by the + // `asm!` macro to refer to the argument "by register"; the + // compiler will fill in the actual register name at each + // reference point. This does away with a need for the compiler + // to generate code moving arugments around in registers before + // a probe site. + let _ = reg; + if use_reg_byte(typ) { + quote! { #arg = in(reg_byte) (#arg #at_use) } + } else { + quote! { #arg = in(reg) (#arg #at_use) } + } + } else { + quote! { in(#reg) (#arg #at_use) } + }; (destructured_arg, register_arg) }) .unzip(); @@ -164,7 +203,7 @@ pub fn call_argument_closure(types: &[DataType]) -> TokenStream { // Convert a supported data type to 1. a type to store for the duration of the // probe invocation and 2. a transformation for compatibility with an asm // register. -fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenStream) { +pub(crate) fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenStream) { match typ { DataType::Serializable(_) => ( // Convert the input to JSON. This is a fallible operation, however, so we wrap the @@ -190,6 +229,13 @@ fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenSt DataType::Native(_) => { let ty = typ.to_rust_type(); ( + // For STAPSDT probes, we cannot "widen" the data type at the + // interface, as we've left the register choice up to the + // compiler and the compiler doesn't like mismatched register + // classes and types for single-byte values (reg_byte vs usize). + #[cfg(usdt_backend_stapsdt)] + quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input)) }, + #[cfg(not(usdt_backend_stapsdt))] quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input) as usize) }, quote! {}, ) @@ -349,11 +395,18 @@ mod tests { #[cfg(target_arch = "aarch64")] let registers = ["x0", "x1"]; let (args, regs) = construct_probe_args(types); + #[cfg(not(usdt_backend_stapsdt))] let expected = quote! { let args = ($args_lambda)(); let arg_0 = (*<_ as ::std::borrow::Borrow<*const u8>>::borrow(&args.0) as usize); let arg_1 = [(args.1.as_ref() as &str).as_bytes(), &[0_u8]].concat(); }; + #[cfg(usdt_backend_stapsdt)] + let expected = quote! { + let args = ($args_lambda)(); + let arg_0 = (*<_ as ::std::borrow::Borrow<*const u8>>::borrow(&args.0)); + let arg_1 = [(args.1.as_ref() as &str).as_bytes(), &[0_u8]].concat(); + }; assert_eq!(args.to_string(), expected.to_string()); for (i, (expected, actual)) in registers @@ -361,8 +414,15 @@ mod tests { .zip(regs.to_string().split(',')) .enumerate() { + // We don't need the register names for STAPSDT probes. + #[cfg(usdt_backend_stapsdt)] + let _ = expected; + let reg = actual.replace(' ', ""); + #[cfg(not(usdt_backend_stapsdt))] let expected = format!("in(\"{}\")(arg_{}", expected, i); + #[cfg(usdt_backend_stapsdt)] + let expected = format!("arg_{i}=in(reg)(arg_{i}"); assert!( reg.starts_with(&expected), "reg: {}; expected {}", @@ -382,10 +442,12 @@ mod tests { })), TokenStream::from_str("foo").unwrap(), ); - assert_eq!( - out.to_string(), - quote! {(*<_ as ::std::borrow::Borrow>::borrow(&foo) as usize)}.to_string() - ); + #[cfg(usdt_backend_stapsdt)] + let out_expected = quote! {(*<_ as ::std::borrow::Borrow>::borrow(&foo))}.to_string(); + #[cfg(not(usdt_backend_stapsdt))] + let out_expected = + quote! {(*<_ as ::std::borrow::Borrow>::borrow(&foo) as usize)}.to_string(); + assert_eq!(out.to_string(), out_expected); assert_eq!(post.to_string(), quote! {}.to_string()); let (out, post) = asm_type_convert( diff --git a/usdt-impl/src/stapsdt.rs b/usdt-impl/src/stapsdt.rs index dfc28467..b38cfc5d 100644 --- a/usdt-impl/src/stapsdt.rs +++ b/usdt-impl/src/stapsdt.rs @@ -22,11 +22,10 @@ #[path = "stapsdt/args.rs"] mod args; -use crate::common::call_argument_closure; +use crate::common::construct_probe_args; use crate::{common, DataType}; use crate::{Probe, Provider}; use args::format_argument; -use dtrace_parser::{BitWidth, Integer}; use proc_macro2::TokenStream; use quote::{format_ident, quote}; use std::convert::TryFrom; @@ -189,90 +188,6 @@ _.stapsdt.base: ) } -// Convert a supported data type to 1. a type to store for the duration of the -// probe invocation and 2. a transformation for compatibility with an asm -// register. -pub fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenStream) { - match typ { - DataType::Serializable(_) => ( - // Convert the input to JSON. This is a fallible operation, however, so we wrap the - // data in a result-like JSON blob, mapping the `Result`'s variants to the keys "ok" - // and "err". - quote! { - [ - match ::usdt::to_json(&#input) { - Ok(json) => format!("{{\"ok\":{}}}", json), - Err(e) => format!("{{\"err\":\"{}\"}}", e.to_string()), - }.as_bytes(), - &[0_u8] - ].concat() - }, - quote! { .as_ptr() as usize }, - ), - DataType::Native(dtrace_parser::DataType::String) => ( - quote! { - [(#input.as_ref() as &str).as_bytes(), &[0_u8]].concat() - }, - quote! { .as_ptr() as usize }, - ), - DataType::Native(_) => { - let ty = typ.to_rust_type(); - ( - quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input)) }, - quote! {}, - ) - } - DataType::UniqueId => (quote! { #input.as_u64() as usize }, quote! {}), - } -} - -// Return code to destructure a probe arguments into identifiers, and to pass those to ASM -// registers. -pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) { - // SystemTap probes don't have a strict calling convention; they'd take - // even sp-relative addresses. - let (unpacked_args, in_regs): (Vec<_>, Vec<_>) = types - .iter() - .enumerate() - .map(|(i, typ)| { - let arg = format_ident!("arg_{}", i); - let index = syn::Index::from(i); - let input = quote! { args.#index }; - let (value, at_use) = asm_type_convert(typ, input); - - // These values must refer to the actual traced data and prevent it - // from being dropped until after we've completed the probe - // invocation. - let destructured_arg = quote! { - let #arg = #value; - }; - // Here, we convert the argument to store it within a register. - let register_arg = if (cfg!(target_arch = "x86_64") || cfg!(target_arch = "x86")) - && matches!( - typ, - DataType::Native(dtrace_parser::DataType::Integer(Integer { - sign: _, - width: BitWidth::Bit8 - })) - ) { - // x86 has a special "reg_byte" register class for taking in - // bytes. - quote! { #arg = in(reg_byte) (#arg #at_use) } - } else { - quote! { #arg = in(reg) (#arg #at_use) } - }; - (destructured_arg, register_arg) - }) - .unzip(); - let arg_lambda = call_argument_closure(types); - let unpacked_args = quote! { - #arg_lambda - #(#unpacked_args)* - }; - let in_regs = quote! { #(#in_regs,)* }; - (unpacked_args, in_regs) -} - fn compile_probe( provider: &Provider, probe: &Probe, From aa2caebf91287f44a6f8f6bec0dcf93f0170a25c Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 5 Nov 2025 23:02:41 +0200 Subject: [PATCH 4/8] fix: update comment on integer_to_asm_op --- usdt-impl/src/stapsdt/args.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/usdt-impl/src/stapsdt/args.rs b/usdt-impl/src/stapsdt/args.rs index a7f7683f..dfcc0b76 100644 --- a/usdt-impl/src/stapsdt/args.rs +++ b/usdt-impl/src/stapsdt/args.rs @@ -17,10 +17,11 @@ use crate::DataType; use dtrace_parser::{BitWidth, DataType as NativeDataType, Integer, Sign}; -/// Convert an Integer type and a register index into a GNU Assembler operation -/// that reads the integer's value from the correct register. Effectively this -/// means generating a string like `%REG` where `REG` is the register that the -/// data is located in. +/// Convert an Integer type and an argument index into a GNU Assembler +/// operation that reads the integer's value from the argument by name. The +/// exact register choice for argument passing is left up to the compiler, +/// meaning that this function generates a string like "{arg_N}" with possible +/// register type/size suffix after the `arg_N`, separated by a colon. fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> String { // See common.rs for note on argument passing and maximum supported // argument count. From dbbc9073027416d64d784a77acfe14c846f1836f Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Wed, 5 Nov 2025 23:08:47 +0200 Subject: [PATCH 5/8] fix: avoid needing to define unused function --- usdt-impl/src/common.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/usdt-impl/src/common.rs b/usdt-impl/src/common.rs index ff72fc64..069bdef1 100644 --- a/usdt-impl/src/common.rs +++ b/usdt-impl/src/common.rs @@ -155,7 +155,8 @@ pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) { let #arg = #value; }; // Here, we convert the argument to store it within a register. - let register_arg = if cfg!(usdt_backend_stapsdt) { + #[cfg(usdt_backend_stapsdt)] + let register_arg = { // In SystemTap probes, the arguments can be passed freely in // any registers without regard to standard function call ABIs. // We thus do not need the register names in the STAPSDT @@ -173,9 +174,9 @@ pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) { } else { quote! { #arg = in(reg) (#arg #at_use) } } - } else { - quote! { in(#reg) (#arg #at_use) } }; + #[cfg(not(usdt_backend_stapsdt))] + let register_arg = quote! { in(#reg) (#arg #at_use) }; (destructured_arg, register_arg) }) .unzip(); From 896b317d912347fe7726658a3d341cdfeaa52c38 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Tue, 18 Nov 2025 06:58:55 +0200 Subject: [PATCH 6/8] ahl: Reflow for better readability Co-authored-by: Adam Leventhal --- usdt-impl/src/common.rs | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/usdt-impl/src/common.rs b/usdt-impl/src/common.rs index 069bdef1..7cdb488a 100644 --- a/usdt-impl/src/common.rs +++ b/usdt-impl/src/common.rs @@ -229,17 +229,15 @@ pub(crate) fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStre ), DataType::Native(_) => { let ty = typ.to_rust_type(); - ( - // For STAPSDT probes, we cannot "widen" the data type at the - // interface, as we've left the register choice up to the - // compiler and the compiler doesn't like mismatched register - // classes and types for single-byte values (reg_byte vs usize). - #[cfg(usdt_backend_stapsdt)] - quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input)) }, - #[cfg(not(usdt_backend_stapsdt))] - quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input) as usize) }, - quote! {}, - ) + #[cfg(not(usdt_backend_stapsdt))] + let value = quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input) as usize) }; + // For STAPSDT probes, we cannot "widen" the data type at the + // interface, as we've left the register choice up to the + // compiler and the compiler doesn't like mismatched register + // classes and types for single-byte values (reg_byte vs usize). + #[cfg(usdt_backend_stapsdt)] + let value = quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input)) }; + (value, quote! {}) } DataType::UniqueId => (quote! { #input.as_u64() as usize }, quote! {}), } From 1621b0ff7293f2c5160770d41a80e3bdbb3992e3 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Tue, 18 Nov 2025 07:38:44 +0200 Subject: [PATCH 7/8] review: add unit tests --- usdt-impl/src/stapsdt/args.rs | 110 ++++++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 4 deletions(-) diff --git a/usdt-impl/src/stapsdt/args.rs b/usdt-impl/src/stapsdt/args.rs index dfcc0b76..7e878077 100644 --- a/usdt-impl/src/stapsdt/args.rs +++ b/usdt-impl/src/stapsdt/args.rs @@ -18,7 +18,7 @@ use crate::DataType; use dtrace_parser::{BitWidth, DataType as NativeDataType, Integer, Sign}; /// Convert an Integer type and an argument index into a GNU Assembler -/// operation that reads the integer's value from the argument by name. The +/// operand that reads the integer's value from the argument by name. The /// exact register choice for argument passing is left up to the compiler, /// meaning that this function generates a string like "{arg_N}" with possible /// register type/size suffix after the `arg_N`, separated by a colon. @@ -90,8 +90,8 @@ const UNIQUE_ID: Integer = Integer { width: BitWidth::Bit64, }; -/// Convert a type and register index to its GNU Assembler operation as a -/// String. +/// Convert a NativeDataType and register index to its GNU Assembler operand +/// as a String. fn native_data_type_to_asm_op(typ: &NativeDataType, reg_index: u8) -> String { match typ { NativeDataType::Integer(int) => integer_to_asm_op(int, reg_index).into(), @@ -112,7 +112,7 @@ fn native_data_type_to_arg_size(typ: &NativeDataType) -> &'static str { } } -/// Convert a DataType and register index to its GNU Assembler operation as a +/// Convert a DataType and register index to a GNU Assembler operand as a /// String. fn data_type_to_asm_op(typ: &DataType, reg_index: u8) -> String { match typ { @@ -159,3 +159,105 @@ pub(crate) fn format_argument((reg_index, typ): (usize, &DataType)) -> String { data_type_to_asm_op(typ, u8::try_from(reg_index).unwrap()) ) } + +#[cfg(test)] +mod test { + use dtrace_parser::{BitWidth, Integer}; + + use crate::{ + internal::args::{format_argument, integer_to_asm_op}, + DataType, + }; + + fn int(width: BitWidth) -> Integer { + Integer { + sign: dtrace_parser::Sign::Signed, + width, + } + } + + fn uint(width: BitWidth) -> Integer { + Integer { + sign: dtrace_parser::Sign::Unsigned, + width, + } + } + + #[test] + fn integer_to_asm_op_tests() { + if cfg!(target_arch = "x86_64") { + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 0), "{arg_0}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 0), "{arg_0:e}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 0), "{arg_0:r}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 0), "{arg_0:r}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 1), "{arg_1}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 1), "{arg_1:e}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 1), "{arg_1:r}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 1), "{arg_1:r}"); + } else if cfg!(target_arch = "aarch64") { + // GNU Assembly syntax for SystemTap only uses the extended register + // for some reason. + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 1), "{arg_1:x}"); + } else { + unreachable!("Unsupported Linux target architecture") + } + } + + #[test] + fn format_argument_tests() { + if cfg!(target_arch = "x86_64") { + assert_eq!(format_argument((0, &DataType::UniqueId)), "8@{arg_0:r}"); + assert_eq!( + format_argument((4, &DataType::Native(dtrace_parser::DataType::String))), + "8@{arg_4:r}" + ); + assert_eq!( + format_argument(( + 4, + &DataType::Native(dtrace_parser::DataType::Integer(uint(BitWidth::Bit32))) + )), + "4@{arg_4:e}" + ); + assert_eq!( + format_argument(( + 3, + &DataType::Native(dtrace_parser::DataType::Integer(int(BitWidth::Bit16))) + )), + "-2@{arg_3:x}" + ); + } else if cfg!(target_arch = "aarch64") { + assert_eq!(format_argument((0, &DataType::UniqueId)), "8@{arg_0:x}"); + assert_eq!( + format_argument((4, &DataType::Native(dtrace_parser::DataType::String))), + "8@{arg_4:x}" + ); + assert_eq!( + format_argument(( + 4, + &DataType::Native(dtrace_parser::DataType::Integer(uint(BitWidth::Bit32))) + )), + "4@{arg_4:x}" + ); + assert_eq!( + format_argument(( + 3, + &DataType::Native(dtrace_parser::DataType::Integer(int(BitWidth::Bit16))) + )), + "-2@{arg_3:x}" + ); + } else { + unreachable!("Unsupported Linux target architecture") + } + } +} From 8e00444a5111f8d92ef1cd2e0715de8e3bf5fdd7 Mon Sep 17 00:00:00 2001 From: Aapo Alasuutari Date: Tue, 18 Nov 2025 07:39:00 +0200 Subject: [PATCH 8/8] review: prose on arguments syntax --- usdt-impl/src/stapsdt.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/usdt-impl/src/stapsdt.rs b/usdt-impl/src/stapsdt.rs index b38cfc5d..ecadb137 100644 --- a/usdt-impl/src/stapsdt.rs +++ b/usdt-impl/src/stapsdt.rs @@ -204,8 +204,28 @@ fn compile_probe( let sema_name = format_ident!("__usdt_sema_{}_{}", provider.name, probe.name); let options = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "x86") { - // Use att_syntax on x86 to generate GNU Assembly Syntax for the - // registers automatically. + // STAPSDT probes contain an "arguments" string which contains the + // size, type, and location of each argument. This string is expected + // to be in the AT&T syntax: we change the syntax for x86 only, as only + // there the syntax effects register naming. The rest of our inline + // assembly here is the same in both AT&T and Intel syntax, so we can + // freely change the syntax without changing the generating code. + // The arguments string on x86 looks like this: + // * "2@%ax -4@%edi 8f@%rsi" + // and in our inline assembly it is as follows: + // * "2@{arg0} -4@{arg1} 8f@{arg2}" + // The argument size and type is explicitly named on the left side of + // the "@" sign, but the register is given by argument name instead of + // explicitly naming eg. "%ax". This gives the compiler the freedom to + // choose for itself where it wants to place the arguments. The only + // thing we need to make sure of is that the argument register strings + // are in the AT&T syntax: in Intel syntax the the "%" character would + // be missing from the register names. + // Note that we could manually fill in the "%" character and still use + // Intel syntax, but that will break down if Rust's inline assembly + // ever gets memory operands. Memory operands in the syntax look like: + // * "8@0x18(%rsp)" + // for "8-byte unsigned integer at stack + 0x18". quote! { options(att_syntax, nomem, nostack, preserves_flags) } } else { quote! { options(nomem, nostack, preserves_flags) }