diff --git a/tests/does-it-work/src/main.rs b/tests/does-it-work/src/main.rs
index acaa5055..dc184b16 100644
--- a/tests/does-it-work/src/main.rs
+++ b/tests/does-it-work/src/main.rs
@@ -242,21 +242,70 @@ mod tests {
                 semaphore_address
             );
 
-            // Verify the argument types
+            // Verify the argument types; the format of the line is eg.
+            // "Arguments: size@%reg size@%reg", but we cannot know which
+            // registers the compiler has chosen to use for arguments passing,
+            // so we have to instead split out the two arguments we expect to
+            // find for this probe specifically, check their sizes, and match
+            // the register strings ("%reg") against a list of possible values.
             let line = lines.next().expect("Expected a line containing arguments");
             let line = line.trim();
-            let arguments_line = if cfg!(target_arch = "x86_64") {
-                "Arguments: 1@%dil 8@%rsi"
-            } else if cfg!(target_arch = "aarch64") {
-                "Arguments: 1@x0 8@x1"
-            } else {
-                unreachable!("Unsupported Linux target architecture")
-            };
+            let (args_header, args_rest) = line
+                .split_once(" ")
+                .expect("Expected arguments line to have one space");
+            let (first_arg, second_arg) = args_rest
+                .split_once(" ")
+                .expect("Expected arguments line to have two spaces");
+            let (first_arg_size, first_arg_register) = first_arg
+                .split_once("@")
+                .expect("Expected first argument to have @ sign");
+            let (second_arg_size, second_arg_register) = second_arg
+                .split_once("@")
+                .expect("Expected first argument to have @ sign");
+            assert_eq!(
+                first_arg_size, "1",
+                "First argument size appears incorrect: {}",
+                first_arg_size
+            );
             assert_eq!(
-                line, arguments_line,
+                second_arg_size, "8",
+                "Second argument size appears incorrect: {}",
+                second_arg_size
+            );
+            assert_eq!(
+                args_header, "Arguments:",
                 "Arguments line appears incorrect: {}",
                 line
             );
+            if cfg!(target_arch = "x86_64") {
+                assert!(
+                    ["%ah", "%al", "%bh", "%bl", "%ch", "%cl", "%dh", "%dl", "%dil", "%sil"]
+                        .contains(&first_arg_register),
+                    "First argument register appears incorrect: {}",
+                    first_arg_register
+                );
+                assert!(
+                    ["%rax", "%rbx", "%rcx", "%rdx", "%rdi", "%rsi"].contains(&second_arg_register),
+                    "Second argument register appears incorrect: {}",
+                    second_arg_register
+                );
+            } else if cfg!(target_arch = "aarch64") {
+                fn is_valid_register(reg: &str) -> bool {
+                    reg.starts_with('x') && str::parse::<u8>(&reg[1..]).is_ok_and(|val| val <= 30)
+                }
+                assert!(
+                    is_valid_register(first_arg_register),
+                    "First argument register appears incorrect: {}",
+                    first_arg_register
+                );
+                assert!(
+                    is_valid_register(second_arg_register),
+                    "Second argument register appears incorrect: {}",
+                    second_arg_register
+                );
+            } else {
+                unreachable!("Unsupported Linux target architecture")
+            }
 
             thr.join().expect("Failed to join test runner thread");
         }
diff --git a/usdt-impl/src/common.rs b/usdt-impl/src/common.rs
index e56b8d47..7cdb488a 100644
--- a/usdt-impl/src/common.rs
+++ b/usdt-impl/src/common.rs
@@ -101,6 +101,26 @@ fn shared_slice_elem_type(reference: &syn::TypeReference) -> Option<&syn::Type>
     }
 }
 
+/// Returns true if the DataType requires the special "reg_byte" register class
+/// to be used for the `asm!` macro arguments passing. This only happens for
+/// STAPSDT probes on x86.
+#[cfg(usdt_backend_stapsdt)]
+fn use_reg_byte(typ: &DataType) -> bool {
+    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+    {
+        use dtrace_parser::{BitWidth, Integer};
+        matches!(
+            typ,
+            DataType::Native(dtrace_parser::DataType::Integer(Integer {
+                sign: _,
+                width: BitWidth::Bit8
+            }))
+        )
+    }
+    #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
+    false
+}
+
 // Return code to destructure a probe arguments into identifiers, and to pass those to ASM
 // registers.
 pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) {
@@ -135,8 +155,28 @@ pub fn construct_probe_args(types: &[DataType]) -> (TokenStream, TokenStream) {
                 let #arg = #value;
             };
             // Here, we convert the argument to store it within a register.
+            #[cfg(usdt_backend_stapsdt)]
+            let register_arg = {
+                // In SystemTap probes, the arguments can be passed freely in
+                // any registers without regard to standard function call ABIs.
+                // We thus do not need the register names in the STAPSDT
+                // backend. Intead, we use the "name = in(reg) arg" pattern to
+                // bind each argument into a local name (we shadow the original
+                // argument name here): this name can then be used by the
+                // `asm!` macro to refer to the argument "by register"; the
+                // compiler will fill in the actual register name at each
+                // reference point. This does away with a need for the compiler
+                // to generate code moving arugments around in registers before
+                // a probe site.
+                let _ = reg;
+                if use_reg_byte(typ) {
+                    quote! { #arg = in(reg_byte) (#arg #at_use) }
+                } else {
+                    quote! { #arg = in(reg) (#arg #at_use) }
+                }
+            };
+            #[cfg(not(usdt_backend_stapsdt))]
             let register_arg = quote! { in(#reg) (#arg #at_use) };
-
             (destructured_arg, register_arg)
         })
         .unzip();
@@ -164,7 +204,7 @@ pub fn call_argument_closure(types: &[DataType]) -> TokenStream {
 // Convert a supported data type to 1. a type to store for the duration of the
 // probe invocation and 2. a transformation for compatibility with an asm
 // register.
-fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenStream) {
+pub(crate) fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenStream) {
     match typ {
         DataType::Serializable(_) => (
             // Convert the input to JSON. This is a fallible operation, however, so we wrap the
@@ -189,10 +229,15 @@ fn asm_type_convert(typ: &DataType, input: TokenStream) -> (TokenStream, TokenSt
         ),
         DataType::Native(_) => {
             let ty = typ.to_rust_type();
-            (
-                quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input) as usize) },
-                quote! {},
-            )
+            #[cfg(not(usdt_backend_stapsdt))]
+            let value = quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input) as usize) };
+            // For STAPSDT probes, we cannot "widen" the data type at the
+            // interface, as we've left the register choice up to the
+            // compiler and the compiler doesn't like mismatched register
+            // classes and types for single-byte values (reg_byte vs usize).
+            #[cfg(usdt_backend_stapsdt)]
+            let value = quote! { (*<_ as ::std::borrow::Borrow<#ty>>::borrow(&#input)) };
+            (value, quote! {})
         }
         DataType::UniqueId => (quote! { #input.as_u64() as usize }, quote! {}),
     }
@@ -349,11 +394,18 @@ mod tests {
         #[cfg(target_arch = "aarch64")]
         let registers = ["x0", "x1"];
         let (args, regs) = construct_probe_args(types);
+        #[cfg(not(usdt_backend_stapsdt))]
         let expected = quote! {
             let args = ($args_lambda)();
             let arg_0 = (*<_ as ::std::borrow::Borrow<*const u8>>::borrow(&args.0) as usize);
             let arg_1 = [(args.1.as_ref() as &str).as_bytes(), &[0_u8]].concat();
         };
+        #[cfg(usdt_backend_stapsdt)]
+        let expected = quote! {
+            let args = ($args_lambda)();
+            let arg_0 = (*<_ as ::std::borrow::Borrow<*const u8>>::borrow(&args.0));
+            let arg_1 = [(args.1.as_ref() as &str).as_bytes(), &[0_u8]].concat();
+        };
         assert_eq!(args.to_string(), expected.to_string());
 
         for (i, (expected, actual)) in registers
@@ -361,8 +413,15 @@ mod tests {
             .zip(regs.to_string().split(','))
             .enumerate()
         {
+            // We don't need the register names for STAPSDT probes.
+            #[cfg(usdt_backend_stapsdt)]
+            let _ = expected;
+
             let reg = actual.replace(' ', "");
+            #[cfg(not(usdt_backend_stapsdt))]
             let expected = format!("in(\"{}\")(arg_{}", expected, i);
+            #[cfg(usdt_backend_stapsdt)]
+            let expected = format!("arg_{i}=in(reg)(arg_{i}");
             assert!(
                 reg.starts_with(&expected),
                 "reg: {}; expected {}",
@@ -382,10 +441,12 @@ mod tests {
             })),
             TokenStream::from_str("foo").unwrap(),
         );
-        assert_eq!(
-            out.to_string(),
-            quote! {(*<_ as ::std::borrow::Borrow<u8>>::borrow(&foo) as usize)}.to_string()
-        );
+        #[cfg(usdt_backend_stapsdt)]
+        let out_expected = quote! {(*<_ as ::std::borrow::Borrow<u8>>::borrow(&foo))}.to_string();
+        #[cfg(not(usdt_backend_stapsdt))]
+        let out_expected =
+            quote! {(*<_ as ::std::borrow::Borrow<u8>>::borrow(&foo) as usize)}.to_string();
+        assert_eq!(out.to_string(), out_expected);
         assert_eq!(post.to_string(), quote! {}.to_string());
 
         let (out, post) = asm_type_convert(
diff --git a/usdt-impl/src/stapsdt.rs b/usdt-impl/src/stapsdt.rs
index a04aaff7..ecadb137 100644
--- a/usdt-impl/src/stapsdt.rs
+++ b/usdt-impl/src/stapsdt.rs
@@ -22,6 +22,7 @@
 #[path = "stapsdt/args.rs"]
 mod args;
 
+use crate::common::construct_probe_args;
 use crate::{common, DataType};
 use crate::{Probe, Provider};
 use args::format_argument;
@@ -192,7 +193,7 @@ fn compile_probe(
     probe: &Probe,
     config: &crate::CompileProvidersConfig,
 ) -> TokenStream {
-    let (unpacked_args, in_regs) = common::construct_probe_args(&probe.types);
+    let (unpacked_args, in_regs) = construct_probe_args(&probe.types);
     let probe_rec = emit_probe_record(&provider.name, &probe.name, Some(&probe.types));
     let type_check_fn = common::construct_type_check(
         &provider.name,
@@ -202,6 +203,33 @@ fn compile_probe(
     );
 
     let sema_name = format_ident!("__usdt_sema_{}_{}", provider.name, probe.name);
+    let options = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "x86") {
+        // STAPSDT probes contain an "arguments" string which contains the
+        // size, type, and location of each argument. This string is expected
+        // to be in the AT&T syntax: we change the syntax for x86 only, as only
+        // there the syntax effects register naming. The rest of our inline
+        // assembly here is the same in both AT&T and Intel syntax, so we can
+        // freely change the syntax without changing the generating code.
+        // The arguments string on x86 looks like this:
+        // * "2@%ax -4@%edi 8f@%rsi"
+        // and in our inline assembly it is as follows:
+        // * "2@{arg0} -4@{arg1} 8f@{arg2}"
+        // The argument size and type is explicitly named on the left side of
+        // the "@" sign, but the register is given by argument name instead of
+        // explicitly naming eg. "%ax". This gives the compiler the freedom to
+        // choose for itself where it wants to place the arguments. The only
+        // thing we need to make sure of is that the argument register strings
+        // are in the AT&T syntax: in Intel syntax the the "%" character would
+        // be missing from the register names.
+        // Note that we could manually fill in the "%" character and still use
+        // Intel syntax, but that will break down if Rust's inline assembly
+        // ever gets memory operands. Memory operands in the syntax look like:
+        // * "8@0x18(%rsp)"
+        // for "8-byte unsigned integer at stack + 0x18".
+        quote! { options(att_syntax, nomem, nostack, preserves_flags) }
+    } else {
+        quote! { options(nomem, nostack, preserves_flags) }
+    };
     let impl_block = quote! {
         unsafe extern "C" {
             // Note: C libraries use a struct containing an unsigned short
@@ -226,7 +254,7 @@ fn compile_probe(
                     "990:   nop",
                     #probe_rec,
                     #in_regs
-                    options(nomem, nostack, preserves_flags)
+                    #options
                 );
             }
         }
diff --git a/usdt-impl/src/stapsdt/args.rs b/usdt-impl/src/stapsdt/args.rs
index 58e863a2..7e878077 100644
--- a/usdt-impl/src/stapsdt/args.rs
+++ b/usdt-impl/src/stapsdt/args.rs
@@ -17,11 +17,12 @@
 use crate::DataType;
 use dtrace_parser::{BitWidth, DataType as NativeDataType, Integer, Sign};
 
-/// Convert an Integer type and a register index into a GNU Assembler operation
-/// that reads the integer's value from the correct register. Effectively this
-/// means generating a string like `%REG` where `REG` is the register that the
-/// data is located in.
-fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> &'static str {
+/// Convert an Integer type and an argument index into a GNU Assembler
+/// operand that reads the integer's value from the argument by name. The
+/// exact register choice for argument passing is left up to the compiler,
+/// meaning that this function generates a string like "{arg_N}" with possible
+/// register type/size suffix after the `arg_N`, separated by a colon.
+fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> String {
     // See common.rs for note on argument passing and maximum supported
     // argument count.
     assert!(
@@ -29,71 +30,22 @@ fn integer_to_asm_op(integer: &Integer, reg_index: u8) -> &'static str {
         "Up to 6 probe arguments are currently supported"
     );
     if cfg!(target_arch = "x86_64") {
-        match (integer.width, reg_index) {
-            (BitWidth::Bit8, 0) => "%dil",
-            (BitWidth::Bit16, 0) => "%di",
-            (BitWidth::Bit32, 0) => "%edi",
-            (BitWidth::Bit64, 0) => "%rdi",
-            (BitWidth::Bit8, 1) => "%sil",
-            (BitWidth::Bit16, 1) => "%si",
-            (BitWidth::Bit32, 1) => "%esi",
-            (BitWidth::Bit64, 1) => "%rsi",
-            (BitWidth::Bit8, 2) => "%dl",
-            (BitWidth::Bit16, 2) => "%dx",
-            (BitWidth::Bit32, 2) => "%edx",
-            (BitWidth::Bit64, 2) => "%rdx",
-            (BitWidth::Bit8, 3) => "%cl",
-            (BitWidth::Bit16, 3) => "%cx",
-            (BitWidth::Bit32, 3) => "%ecx",
-            (BitWidth::Bit64, 3) => "%rcx",
-            (BitWidth::Bit8, 4) => "%r8b",
-            (BitWidth::Bit16, 4) => "%r8w",
-            (BitWidth::Bit32, 4) => "%r8d",
-            (BitWidth::Bit64, 4) => "%r8",
-            (BitWidth::Bit8, 5) => "%r9b",
-            (BitWidth::Bit16, 5) => "%r9w",
-            (BitWidth::Bit32, 5) => "%r9d",
-            (BitWidth::Bit64, 5) => "%r9",
+        match integer.width {
+            BitWidth::Bit8 => format!("{{arg_{reg_index}}}"),
+            BitWidth::Bit16 => format!("{{arg_{reg_index}:x}}"),
+            BitWidth::Bit32 => format!("{{arg_{reg_index}:e}}"),
+            BitWidth::Bit64 => format!("{{arg_{reg_index}:r}}"),
             #[cfg(target_pointer_width = "32")]
-            (BitWidth::Pointer, 0) => "%edi",
+            BitWidth::Pointer => format!("{{arg_{reg_index}:e}}"),
             #[cfg(target_pointer_width = "64")]
-            (BitWidth::Pointer, 0) => "%rdi",
-            #[cfg(target_pointer_width = "32")]
-            (BitWidth::Pointer, 1) => "%esi",
-            #[cfg(target_pointer_width = "64")]
-            (BitWidth::Pointer, 1) => "%rsi",
-            #[cfg(target_pointer_width = "32")]
-            (BitWidth::Pointer, 2) => "%edx",
-            #[cfg(target_pointer_width = "64")]
-            (BitWidth::Pointer, 2) => "%rdx",
-            #[cfg(target_pointer_width = "32")]
-            (BitWidth::Pointer, 3) => "%ecx",
-            #[cfg(target_pointer_width = "64")]
-            (BitWidth::Pointer, 3) => "%rcx",
-            #[cfg(target_pointer_width = "32")]
-            (BitWidth::Pointer, 4) => "%e8",
-            #[cfg(target_pointer_width = "64")]
-            (BitWidth::Pointer, 4) => "%r8",
-            #[cfg(target_pointer_width = "32")]
-            (BitWidth::Pointer, 5) => "%e9",
-            #[cfg(target_pointer_width = "64")]
-            (BitWidth::Pointer, 5) => "%r9",
+            BitWidth::Pointer => format!("{{arg_{reg_index}:r}}"),
             #[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))]
-            (BitWidth::Pointer, _) => compile_error!("Unsupported pointer width"),
-            _ => unreachable!(),
+            BitWidth::Pointer => compile_error!("Unsupported pointer width"),
         }
     } else if cfg!(target_arch = "aarch64") {
         // GNU Assembly syntax for SystemTap only uses the extended register
         // for some reason.
-        match reg_index {
-            0 => "x0",
-            1 => "x1",
-            2 => "x2",
-            3 => "x3",
-            4 => "x4",
-            5 => "x5",
-            _ => unreachable!(),
-        }
+        format!("{{arg_{reg_index}:x}}")
     } else {
         unreachable!("Unsupported Linux target architecture")
     }
@@ -138,8 +90,8 @@ const UNIQUE_ID: Integer = Integer {
     width: BitWidth::Bit64,
 };
 
-/// Convert a type and register index to its GNU Assembler operation as a
-/// String.
+/// Convert a NativeDataType and register index to its GNU Assembler operand
+/// as a String.
 fn native_data_type_to_asm_op(typ: &NativeDataType, reg_index: u8) -> String {
     match typ {
         NativeDataType::Integer(int) => integer_to_asm_op(int, reg_index).into(),
@@ -160,7 +112,7 @@ fn native_data_type_to_arg_size(typ: &NativeDataType) -> &'static str {
     }
 }
 
-/// Convert a DataType and register index to its GNU Assembler operation as a
+/// Convert a DataType and register index to a GNU Assembler operand as a
 /// String.
 fn data_type_to_asm_op(typ: &DataType, reg_index: u8) -> String {
     match typ {
@@ -207,3 +159,105 @@ pub(crate) fn format_argument((reg_index, typ): (usize, &DataType)) -> String {
         data_type_to_asm_op(typ, u8::try_from(reg_index).unwrap())
     )
 }
+
+#[cfg(test)]
+mod test {
+    use dtrace_parser::{BitWidth, Integer};
+
+    use crate::{
+        internal::args::{format_argument, integer_to_asm_op},
+        DataType,
+    };
+
+    fn int(width: BitWidth) -> Integer {
+        Integer {
+            sign: dtrace_parser::Sign::Signed,
+            width,
+        }
+    }
+
+    fn uint(width: BitWidth) -> Integer {
+        Integer {
+            sign: dtrace_parser::Sign::Unsigned,
+            width,
+        }
+    }
+
+    #[test]
+    fn integer_to_asm_op_tests() {
+        if cfg!(target_arch = "x86_64") {
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 0), "{arg_0}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 0), "{arg_0:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 0), "{arg_0:e}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 0), "{arg_0:r}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 0), "{arg_0:r}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 1), "{arg_1}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 1), "{arg_1:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 1), "{arg_1:e}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 1), "{arg_1:r}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 1), "{arg_1:r}");
+        } else if cfg!(target_arch = "aarch64") {
+            // GNU Assembly syntax for SystemTap only uses the extended register
+            // for some reason.
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 0), "{arg_0:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 0), "{arg_0:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 0), "{arg_0:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 0), "{arg_0:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 0), "{arg_0:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit8), 1), "{arg_1:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit16), 1), "{arg_1:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit32), 1), "{arg_1:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Bit64), 1), "{arg_1:x}");
+            assert_eq!(integer_to_asm_op(&uint(BitWidth::Pointer), 1), "{arg_1:x}");
+        } else {
+            unreachable!("Unsupported Linux target architecture")
+        }
+    }
+
+    #[test]
+    fn format_argument_tests() {
+        if cfg!(target_arch = "x86_64") {
+            assert_eq!(format_argument((0, &DataType::UniqueId)), "8@{arg_0:r}");
+            assert_eq!(
+                format_argument((4, &DataType::Native(dtrace_parser::DataType::String))),
+                "8@{arg_4:r}"
+            );
+            assert_eq!(
+                format_argument((
+                    4,
+                    &DataType::Native(dtrace_parser::DataType::Integer(uint(BitWidth::Bit32)))
+                )),
+                "4@{arg_4:e}"
+            );
+            assert_eq!(
+                format_argument((
+                    3,
+                    &DataType::Native(dtrace_parser::DataType::Integer(int(BitWidth::Bit16)))
+                )),
+                "-2@{arg_3:x}"
+            );
+        } else if cfg!(target_arch = "aarch64") {
+            assert_eq!(format_argument((0, &DataType::UniqueId)), "8@{arg_0:x}");
+            assert_eq!(
+                format_argument((4, &DataType::Native(dtrace_parser::DataType::String))),
+                "8@{arg_4:x}"
+            );
+            assert_eq!(
+                format_argument((
+                    4,
+                    &DataType::Native(dtrace_parser::DataType::Integer(uint(BitWidth::Bit32)))
+                )),
+                "4@{arg_4:x}"
+            );
+            assert_eq!(
+                format_argument((
+                    3,
+                    &DataType::Native(dtrace_parser::DataType::Integer(int(BitWidth::Bit16)))
+                )),
+                "-2@{arg_3:x}"
+            );
+        } else {
+            unreachable!("Unsupported Linux target architecture")
+        }
+    }
+}