diff --git a/tests/does-it-work/src/main.rs b/tests/does-it-work/src/main.rs index acaa5055..dc184b16 100644 --- a/tests/does-it-work/src/main.rs +++ b/tests/does-it-work/src/main.rs @@ -242,21 +242,70 @@ mod tests { semaphore_address ); - // Verify the argument types + // Verify the argument types; the format of the line is eg. + // "Arguments: size@%reg size@%reg", but we cannot know which + // registers the compiler has chosen to use for arguments passing, + // so we have to instead split out the two arguments we expect to + // find for this probe specifically, check their sizes, and match + // the register strings ("%reg") against a list of possible values. let line = lines.next().expect("Expected a line containing arguments"); let line = line.trim(); - let arguments_line = if cfg!(target_arch = "x86_64") { - "Arguments: 1@%dil 8@%rsi" - } else if cfg!(target_arch = "aarch64") { - "Arguments: 1@x0 8@x1" - } else { - unreachable!("Unsupported Linux target architecture") - }; + let (args_header, args_rest) = line + .split_once(" ") + .expect("Expected arguments line to have one space"); + let (first_arg, second_arg) = args_rest + .split_once(" ") + .expect("Expected arguments line to have two spaces"); + let (first_arg_size, first_arg_register) = first_arg + .split_once("@") + .expect("Expected first argument to have @ sign"); + let (second_arg_size, second_arg_register) = second_arg + .split_once("@") + .expect("Expected first argument to have @ sign"); + assert_eq!( + first_arg_size, "1", + "First argument size appears incorrect: {}", + first_arg_size + ); assert_eq!( - line, arguments_line, + second_arg_size, "8", + "Second argument size appears incorrect: {}", + second_arg_size + ); + assert_eq!( + args_header, "Arguments:", "Arguments line appears incorrect: {}", line ); + if cfg!(target_arch = "x86_64") { + assert!( + ["%ah", "%al", "%bh", "%bl", "%ch", "%cl", "%dh", "%dl", "%dil", "%sil"] + .contains(&first_arg_register), + "First argument register appears incorrect: {}", + first_arg_register + ); + assert!( + ["%rax", "%rbx", "%rcx", "%rdx", "%rdi", "%rsi"].contains(&second_arg_register), + "Second argument register appears incorrect: {}", + second_arg_register + ); + } else if cfg!(target_arch = "aarch64") { + fn is_valid_register(reg: &str) -> bool { + reg.starts_with('x') && str::parse::(®[1..]).is_ok_and(|val| val <= 30) + } + assert!( + is_valid_register(first_arg_register), + "First argument register appears incorrect: {}", + first_arg_register + ); + assert!( + is_valid_register(second_arg_register), + "Second argument register appears incorrect: {}", + second_arg_register + ); + } else { + unreachable!("Unsupported Linux target architecture") + } thr.join().expect("Failed to join test runner thread"); } diff --git a/usdt-impl/src/common.rs b/usdt-impl/src/common.rs index e56b8d47..7cdb488a 100644 --- a/usdt-impl/src/common.rs +++ b/usdt-impl/src/common.rs @@ -101,6 +101,26 @@ fn shared_slice_elem_type(reference: &syn::TypeReference) -> Option<&syn::Type> } } +/// Returns true if the DataType requires the special "reg_byte" register class +/// to be used for the `asm!` macro arguments passing. This only happens for +/// STAPSDT probes on x86. +#[cfg(usdt_backend_stapsdt)] +fn use_reg_byte(typ: &DataType) -> bool { + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + { + use dtrace_parser::{BitWidth, Integer}; + matches!( + typ, + DataType::Native(dtrace_parser::DataType::Integer(Integer { + sign: _, + width: BitWidth::Bit8 + })) + ) + } + #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] + false +} + // Return code to destructure a probe arguments into identifiers, and to pass those to ASM // registers. pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) { @@ -135,8 +155,28 @@ pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) { let #arg = #value; }; // Here, we convert the argument to store it within a register. + #[cfg(usdt_backend_stapsdt)] + let register_arg = { + // In SystemTap probes, the arguments can be passed freely in + // any registers without regard to standard function call ABIs. + // We thus do not need the register names in the STAPSDT + // backend. Intead, we use the "name = in(reg) arg" pattern to + // bind each argument into a local name (we shadow the original + // argument name here): this name can then be used by the + // `asm!` macro to refer to the argument "by register"; the + // compiler will fill in the actual register name at each + // reference point. This does away with a need for the compiler + // to generate code moving arugments around in registers before + // a probe site. + let _ = reg; + if use_reg_byte(typ) { + quote! { #arg = in(reg_byte) (#arg #at_use) } + } else { + quote! { #arg = in(reg) (#arg #at_use) } + } + }; + #[cfg(not(usdt_backend_stapsdt))] let register_arg = quote! { in(#reg) (#arg #at_use) }; - (destructured_arg, register_arg) }) .unzip(); @@ -164,7 +204,7 @@ pub fn call_argument_closure(types: &[DataType]) -> TokenStream { // Convert a supported data type to 1. a type to store for the duration of the // probe invocation and 2. a transformation for compatibility with an asm // register. -fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenStream) { +pub(crate) fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenStream) { match typ { DataType::Serializable(_) => ( // Convert the input to JSON. This is a fallible operation, however, so we wrap the @@ -189,10 +229,15 @@ fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenSt ), DataType::Native(_) => { let ty = typ.to_rust_type(); - ( - quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input) as usize) }, - quote! {}, - ) + #[cfg(not(usdt_backend_stapsdt))] + let value = quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input) as usize) }; + // For STAPSDT probes, we cannot "widen" the data type at the + // interface, as we've left the register choice up to the + // compiler and the compiler doesn't like mismatched register + // classes and types for single-byte values (reg_byte vs usize). + #[cfg(usdt_backend_stapsdt)] + let value = quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input)) }; + (value, quote! {}) } DataType::UniqueId => (quote! { #input.as_u64() as usize }, quote! {}), } @@ -349,11 +394,18 @@ mod tests { #[cfg(target_arch = "aarch64")] let registers = ["x0", "x1"]; let (args, regs) = construct_probe_args(types); + #[cfg(not(usdt_backend_stapsdt))] let expected = quote! { let args = ($args_lambda)(); let arg_0 = (*<_ as ::std::borrow::Borrow<*const u8>>::borrow(&args.0) as usize); let arg_1 = [(args.1.as_ref() as &str).as_bytes(), &[0_u8]].concat(); }; + #[cfg(usdt_backend_stapsdt)] + let expected = quote! { + let args = ($args_lambda)(); + let arg_0 = (*<_ as ::std::borrow::Borrow<*const u8>>::borrow(&args.0)); + let arg_1 = [(args.1.as_ref() as &str).as_bytes(), &[0_u8]].concat(); + }; assert_eq!(args.to_string(), expected.to_string()); for (i, (expected, actual)) in registers @@ -361,8 +413,15 @@ mod tests { .zip(regs.to_string().split(',')) .enumerate() { + // We don't need the register names for STAPSDT probes. + #[cfg(usdt_backend_stapsdt)] + let _ = expected; + let reg = actual.replace(' ', ""); + #[cfg(not(usdt_backend_stapsdt))] let expected = format!("in(\"{}\")(arg_{}", expected, i); + #[cfg(usdt_backend_stapsdt)] + let expected = format!("arg_{i}=in(reg)(arg_{i}"); assert!( reg.starts_with(&expected), "reg: {}; expected {}", @@ -382,10 +441,12 @@ mod tests { })), TokenStream::from_str("foo").unwrap(), ); - assert_eq!( - out.to_string(), - quote! {(*<_ as ::std::borrow::Borrow>::borrow(&foo) as usize)}.to_string() - ); + #[cfg(usdt_backend_stapsdt)] + let out_expected = quote! {(*<_ as ::std::borrow::Borrow>::borrow(&foo))}.to_string(); + #[cfg(not(usdt_backend_stapsdt))] + let out_expected = + quote! {(*<_ as ::std::borrow::Borrow>::borrow(&foo) as usize)}.to_string(); + assert_eq!(out.to_string(), out_expected); assert_eq!(post.to_string(), quote! {}.to_string()); let (out, post) = asm_type_convert( diff --git a/usdt-impl/src/stapsdt.rs b/usdt-impl/src/stapsdt.rs index a04aaff7..ecadb137 100644 --- a/usdt-impl/src/stapsdt.rs +++ b/usdt-impl/src/stapsdt.rs @@ -22,6 +22,7 @@ #[path = "stapsdt/args.rs"] mod args; +use crate::common::construct_probe_args; use crate::{common, DataType}; use crate::{Probe, Provider}; use args::format_argument; @@ -192,7 +193,7 @@ fn compile_probe( probe: &Probe, config: &crate::CompileProvidersConfig, ) -> TokenStream { - let (unpacked_args, in_regs) = common::construct_probe_args(&probe.types); + let (unpacked_args, in_regs) = construct_probe_args(&probe.types); let probe_rec = emit_probe_record(&provider.name, &probe.name, Some(&probe.types)); let type_check_fn = common::construct_type_check( &provider.name, @@ -202,6 +203,33 @@ fn compile_probe( ); let sema_name = format_ident!("__usdt_sema_{}_{}", provider.name, probe.name); + let options = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "x86") { + // STAPSDT probes contain an "arguments" string which contains the + // size, type, and location of each argument. This string is expected + // to be in the AT&T syntax: we change the syntax for x86 only, as only + // there the syntax effects register naming. The rest of our inline + // assembly here is the same in both AT&T and Intel syntax, so we can + // freely change the syntax without changing the generating code. + // The arguments string on x86 looks like this: + // * "2@%ax -4@%edi 8f@%rsi" + // and in our inline assembly it is as follows: + // * "2@{arg0} -4@{arg1} 8f@{arg2}" + // The argument size and type is explicitly named on the left side of + // the "@" sign, but the register is given by argument name instead of + // explicitly naming eg. "%ax". This gives the compiler the freedom to + // choose for itself where it wants to place the arguments. The only + // thing we need to make sure of is that the argument register strings + // are in the AT&T syntax: in Intel syntax the the "%" character would + // be missing from the register names. + // Note that we could manually fill in the "%" character and still use + // Intel syntax, but that will break down if Rust's inline assembly + // ever gets memory operands. Memory operands in the syntax look like: + // * "8@0x18(%rsp)" + // for "8-byte unsigned integer at stack + 0x18". + quote! { options(att_syntax, nomem, nostack, preserves_flags) } + } else { + quote! { options(nomem, nostack, preserves_flags) } + }; let impl_block = quote! { unsafe extern "C" { // Note: C libraries use a struct containing an unsigned short @@ -226,7 +254,7 @@ fn compile_probe( "990: nop", #probe_rec, #in_regs - options(nomem, nostack, preserves_flags) + #options ); } } diff --git a/usdt-impl/src/stapsdt/args.rs b/usdt-impl/src/stapsdt/args.rs index 58e863a2..7e878077 100644 --- a/usdt-impl/src/stapsdt/args.rs +++ b/usdt-impl/src/stapsdt/args.rs @@ -17,11 +17,12 @@ use crate::DataType; use dtrace_parser::{BitWidth, DataType as NativeDataType, Integer, Sign}; -/// Convert an Integer type and a register index into a GNU Assembler operation -/// that reads the integer's value from the correct register. Effectively this -/// means generating a string like `%REG` where `REG` is the register that the -/// data is located in. -fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> &'static str { +/// Convert an Integer type and an argument index into a GNU Assembler +/// operand that reads the integer's value from the argument by name. The +/// exact register choice for argument passing is left up to the compiler, +/// meaning that this function generates a string like "{arg_N}" with possible +/// register type/size suffix after the `arg_N`, separated by a colon. +fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> String { // See common.rs for note on argument passing and maximum supported // argument count. assert!( @@ -29,71 +30,22 @@ fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> &'static str { "Up to 6 probe arguments are currently supported" ); if cfg!(target_arch = "x86_64") { - match (integer.width, reg_index) { - (BitWidth::Bit8, 0) => "%dil", - (BitWidth::Bit16, 0) => "%di", - (BitWidth::Bit32, 0) => "%edi", - (BitWidth::Bit64, 0) => "%rdi", - (BitWidth::Bit8, 1) => "%sil", - (BitWidth::Bit16, 1) => "%si", - (BitWidth::Bit32, 1) => "%esi", - (BitWidth::Bit64, 1) => "%rsi", - (BitWidth::Bit8, 2) => "%dl", - (BitWidth::Bit16, 2) => "%dx", - (BitWidth::Bit32, 2) => "%edx", - (BitWidth::Bit64, 2) => "%rdx", - (BitWidth::Bit8, 3) => "%cl", - (BitWidth::Bit16, 3) => "%cx", - (BitWidth::Bit32, 3) => "%ecx", - (BitWidth::Bit64, 3) => "%rcx", - (BitWidth::Bit8, 4) => "%r8b", - (BitWidth::Bit16, 4) => "%r8w", - (BitWidth::Bit32, 4) => "%r8d", - (BitWidth::Bit64, 4) => "%r8", - (BitWidth::Bit8, 5) => "%r9b", - (BitWidth::Bit16, 5) => "%r9w", - (BitWidth::Bit32, 5) => "%r9d", - (BitWidth::Bit64, 5) => "%r9", + match integer.width { + BitWidth::Bit8 => format!("{{arg_{reg_index}}}"), + BitWidth::Bit16 => format!("{{arg_{reg_index}:x}}"), + BitWidth::Bit32 => format!("{{arg_{reg_index}:e}}"), + BitWidth::Bit64 => format!("{{arg_{reg_index}:r}}"), #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 0) => "%edi", + BitWidth::Pointer => format!("{{arg_{reg_index}:e}}"), #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 0) => "%rdi", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 1) => "%esi", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 1) => "%rsi", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 2) => "%edx", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 2) => "%rdx", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 3) => "%ecx", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 3) => "%rcx", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 4) => "%e8", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 4) => "%r8", - #[cfg(target_pointer_width = "32")] - (BitWidth::Pointer, 5) => "%e9", - #[cfg(target_pointer_width = "64")] - (BitWidth::Pointer, 5) => "%r9", + BitWidth::Pointer => format!("{{arg_{reg_index}:r}}"), #[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))] - (BitWidth::Pointer, _) => compile_error!("Unsupported pointer width"), - _ => unreachable!(), + BitWidth::Pointer => compile_error!("Unsupported pointer width"), } } else if cfg!(target_arch = "aarch64") { // GNU Assembly syntax for SystemTap only uses the extended register // for some reason. - match reg_index { - 0 => "x0", - 1 => "x1", - 2 => "x2", - 3 => "x3", - 4 => "x4", - 5 => "x5", - _ => unreachable!(), - } + format!("{{arg_{reg_index}:x}}") } else { unreachable!("Unsupported Linux target architecture") } @@ -138,8 +90,8 @@ const UNIQUE_ID: Integer = Integer { width: BitWidth::Bit64, }; -/// Convert a type and register index to its GNU Assembler operation as a -/// String. +/// Convert a NativeDataType and register index to its GNU Assembler operand +/// as a String. fn native_data_type_to_asm_op(typ: &NativeDataType, reg_index: u8) -> String { match typ { NativeDataType::Integer(int) => integer_to_asm_op(int, reg_index).into(), @@ -160,7 +112,7 @@ fn native_data_type_to_arg_size(typ: &NativeDataType) -> &'static str { } } -/// Convert a DataType and register index to its GNU Assembler operation as a +/// Convert a DataType and register index to a GNU Assembler operand as a /// String. fn data_type_to_asm_op(typ: &DataType, reg_index: u8) -> String { match typ { @@ -207,3 +159,105 @@ pub(crate) fn format_argument((reg_index, typ): (usize, &DataType)) -> String { data_type_to_asm_op(typ, u8::try_from(reg_index).unwrap()) ) } + +#[cfg(test)] +mod test { + use dtrace_parser::{BitWidth, Integer}; + + use crate::{ + internal::args::{format_argument, integer_to_asm_op}, + DataType, + }; + + fn int(width: BitWidth) -> Integer { + Integer { + sign: dtrace_parser::Sign::Signed, + width, + } + } + + fn uint(width: BitWidth) -> Integer { + Integer { + sign: dtrace_parser::Sign::Unsigned, + width, + } + } + + #[test] + fn integer_to_asm_op_tests() { + if cfg!(target_arch = "x86_64") { + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 0), "{arg_0}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 0), "{arg_0:e}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 0), "{arg_0:r}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 0), "{arg_0:r}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 1), "{arg_1}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 1), "{arg_1:e}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 1), "{arg_1:r}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 1), "{arg_1:r}"); + } else if cfg!(target_arch = "aarch64") { + // GNU Assembly syntax for SystemTap only uses the extended register + // for some reason. + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 0), "{arg_0:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 1), "{arg_1:x}"); + assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 1), "{arg_1:x}"); + } else { + unreachable!("Unsupported Linux target architecture") + } + } + + #[test] + fn format_argument_tests() { + if cfg!(target_arch = "x86_64") { + assert_eq!(format_argument((0, &DataType::UniqueId)), "8@{arg_0:r}"); + assert_eq!( + format_argument((4, &DataType::Native(dtrace_parser::DataType::String))), + "8@{arg_4:r}" + ); + assert_eq!( + format_argument(( + 4, + &DataType::Native(dtrace_parser::DataType::Integer(uint(BitWidth::Bit32))) + )), + "4@{arg_4:e}" + ); + assert_eq!( + format_argument(( + 3, + &DataType::Native(dtrace_parser::DataType::Integer(int(BitWidth::Bit16))) + )), + "-2@{arg_3:x}" + ); + } else if cfg!(target_arch = "aarch64") { + assert_eq!(format_argument((0, &DataType::UniqueId)), "8@{arg_0:x}"); + assert_eq!( + format_argument((4, &DataType::Native(dtrace_parser::DataType::String))), + "8@{arg_4:x}" + ); + assert_eq!( + format_argument(( + 4, + &DataType::Native(dtrace_parser::DataType::Integer(uint(BitWidth::Bit32))) + )), + "4@{arg_4:x}" + ); + assert_eq!( + format_argument(( + 3, + &DataType::Native(dtrace_parser::DataType::Integer(int(BitWidth::Bit16))) + )), + "-2@{arg_3:x}" + ); + } else { + unreachable!("Unsupported Linux target architecture") + } + } +}