From 43506591e8996fda9617c26d489497c50379abea Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 5 Jul 2025 10:32:16 +0200 Subject: [PATCH 01/24] miri-script: set msrv so clippy doesn't suggest too-new features --- src/tools/miri/miri-script/Cargo.toml | 1 + src/tools/miri/miri-script/src/commands.rs | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tools/miri/miri-script/Cargo.toml b/src/tools/miri/miri-script/Cargo.toml index 462a9cc62bff0..9240788d6bce1 100644 --- a/src/tools/miri/miri-script/Cargo.toml +++ b/src/tools/miri/miri-script/Cargo.toml @@ -7,6 +7,7 @@ repository = "https://github.com/rust-lang/miri" version = "0.1.0" default-run = "miri-script" edition = "2024" +rust-version = "1.85" [workspace] # We make this a workspace root so that cargo does not go looking in ../Cargo.toml for the workspace root. diff --git a/src/tools/miri/miri-script/src/commands.rs b/src/tools/miri/miri-script/src/commands.rs index e948beef00447..e6ebdf54e3851 100644 --- a/src/tools/miri/miri-script/src/commands.rs +++ b/src/tools/miri/miri-script/src/commands.rs @@ -702,7 +702,6 @@ impl Command { let mut early_flags = Vec::::new(); // In `dep` mode, the target is already passed via `MIRI_TEST_TARGET` - #[expect(clippy::collapsible_if)] // we need to wait until this is stable if !dep { if let Some(target) = &target { early_flags.push("--target".into()); @@ -735,7 +734,6 @@ impl Command { // Add Miri flags let mut cmd = cmd.args(&miri_flags).args(&early_flags).args(&flags); // For `--dep` we also need to set the target in the env var. - #[expect(clippy::collapsible_if)] // we need to wait until this is stable if dep { if let Some(target) = &target { cmd = cmd.env("MIRI_TEST_TARGET", target); From d87984e12934cc2f1cf64461f2416a82d76383fe Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 5 Jul 2025 05:06:18 +0100 Subject: [PATCH 02/24] shims::fs adding more fields to FileMetadata addressing, partially at least, FIXME comment and targetting unixes, adding device, user and group ids. --- src/tools/miri/src/shims/unix/fs.rs | 31 +++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/tools/miri/src/shims/unix/fs.rs b/src/tools/miri/src/shims/unix/fs.rs index 0f7d453b296c9..073072a883d61 100644 --- a/src/tools/miri/src/shims/unix/fs.rs +++ b/src/tools/miri/src/shims/unix/fs.rs @@ -128,16 +128,19 @@ trait EvalContextExtPrivate<'tcx>: crate::MiriInterpCxExt<'tcx> { let (created_sec, created_nsec) = metadata.created.unwrap_or((0, 0)); let (modified_sec, modified_nsec) = metadata.modified.unwrap_or((0, 0)); let mode = metadata.mode.to_uint(this.libc_ty_layout("mode_t").size)?; + let dev = metadata.dev; + let uid = metadata.uid; + let gid = metadata.gid; let buf = this.deref_pointer_as(buf_op, this.libc_ty_layout("stat"))?; this.write_int_fields_named( &[ - ("st_dev", 0), + ("st_dev", dev.into()), ("st_mode", mode.try_into().unwrap()), ("st_nlink", 0), ("st_ino", 0), - ("st_uid", 0), - ("st_gid", 0), + ("st_uid", uid.into()), + ("st_gid", gid.into()), ("st_rdev", 0), ("st_atime", access_sec.into()), ("st_mtime", modified_sec.into()), @@ -1544,6 +1547,9 @@ struct FileMetadata { created: Option<(u64, u32)>, accessed: Option<(u64, u32)>, modified: Option<(u64, u32)>, + dev: u64, + uid: u32, + gid: u32, } impl FileMetadata { @@ -1575,6 +1581,9 @@ impl FileMetadata { ecx: &mut MiriInterpCx<'tcx>, metadata: Result, ) -> InterpResult<'tcx, Result> { + #[cfg(unix)] + use std::os::unix::fs::MetadataExt; + let metadata = match metadata { Ok(metadata) => metadata, Err(e) => { @@ -1601,6 +1610,20 @@ impl FileMetadata { let modified = extract_sec_and_nsec(metadata.modified())?; // FIXME: Provide more fields using platform specific methods. - interp_ok(Ok(FileMetadata { mode, size, created, accessed, modified })) + + cfg_select! { + unix => { + let dev = metadata.dev(); + let uid = metadata.uid(); + let gid = metadata.gid(); + } + _ => { + let dev = 0; + let uid = 0; + let gid = 0; + } + } + + interp_ok(Ok(FileMetadata { mode, size, created, accessed, modified, dev, uid, gid })) } } From ef0490c8685ad6ef76e2777934571fc913abd1a8 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 5 Jul 2025 11:40:37 +0200 Subject: [PATCH 03/24] minor cleanup --- src/tools/miri/src/shims/unix/fs.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/tools/miri/src/shims/unix/fs.rs b/src/tools/miri/src/shims/unix/fs.rs index 073072a883d61..0f2878ad26c85 100644 --- a/src/tools/miri/src/shims/unix/fs.rs +++ b/src/tools/miri/src/shims/unix/fs.rs @@ -128,19 +128,16 @@ trait EvalContextExtPrivate<'tcx>: crate::MiriInterpCxExt<'tcx> { let (created_sec, created_nsec) = metadata.created.unwrap_or((0, 0)); let (modified_sec, modified_nsec) = metadata.modified.unwrap_or((0, 0)); let mode = metadata.mode.to_uint(this.libc_ty_layout("mode_t").size)?; - let dev = metadata.dev; - let uid = metadata.uid; - let gid = metadata.gid; let buf = this.deref_pointer_as(buf_op, this.libc_ty_layout("stat"))?; this.write_int_fields_named( &[ - ("st_dev", dev.into()), + ("st_dev", metadata.dev.into()), ("st_mode", mode.try_into().unwrap()), ("st_nlink", 0), ("st_ino", 0), - ("st_uid", uid.into()), - ("st_gid", gid.into()), + ("st_uid", metadata.uid.into()), + ("st_gid", metadata.gid.into()), ("st_rdev", 0), ("st_atime", access_sec.into()), ("st_mtime", modified_sec.into()), @@ -1581,9 +1578,6 @@ impl FileMetadata { ecx: &mut MiriInterpCx<'tcx>, metadata: Result, ) -> InterpResult<'tcx, Result> { - #[cfg(unix)] - use std::os::unix::fs::MetadataExt; - let metadata = match metadata { Ok(metadata) => metadata, Err(e) => { @@ -1613,6 +1607,7 @@ impl FileMetadata { cfg_select! { unix => { + use std::os::unix::fs::MetadataExt; let dev = metadata.dev(); let uid = metadata.uid(); let gid = metadata.gid(); From e40f8b26ada6f517235fd5813231adbb79e58ced Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 6 Jul 2025 16:51:14 +0200 Subject: [PATCH 04/24] =?UTF-8?q?rename=20panic=5Fpaylods=20=E2=86=92=20un?= =?UTF-8?q?wind=5Fpayloads?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/miri/src/concurrency/thread.rs | 10 +++++----- src/tools/miri/src/shims/panic.rs | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs index 56c197948546c..ec65ea685e1fe 100644 --- a/src/tools/miri/src/concurrency/thread.rs +++ b/src/tools/miri/src/concurrency/thread.rs @@ -186,15 +186,15 @@ pub struct Thread<'tcx> { /// The join status. join_status: ThreadJoinStatus, - /// Stack of active panic payloads for the current thread. Used for storing - /// the argument of the call to `miri_start_unwind` (the panic payload) when unwinding. + /// Stack of active unwind payloads for the current thread. Used for storing + /// the argument of the call to `miri_start_unwind` (the payload) when unwinding. /// This is pointer-sized, and matches the `Payload` type in `src/libpanic_unwind/miri.rs`. /// /// In real unwinding, the payload gets passed as an argument to the landing pad, /// which then forwards it to 'Resume'. However this argument is implicit in MIR, /// so we have to store it out-of-band. When there are multiple active unwinds, /// the innermost one is always caught first, so we can store them as a stack. - pub(crate) panic_payloads: Vec>, + pub(crate) unwind_payloads: Vec>, /// Last OS error location in memory. It is a 32-bit integer. pub(crate) last_error: Option>, @@ -282,7 +282,7 @@ impl<'tcx> Thread<'tcx> { stack: Vec::new(), top_user_relevant_frame: None, join_status: ThreadJoinStatus::Joinable, - panic_payloads: Vec::new(), + unwind_payloads: Vec::new(), last_error: None, on_stack_empty, } @@ -292,7 +292,7 @@ impl<'tcx> Thread<'tcx> { impl VisitProvenance for Thread<'_> { fn visit_provenance(&self, visit: &mut VisitWith<'_>) { let Thread { - panic_payloads: panic_payload, + unwind_payloads: panic_payload, last_error, stack, top_user_relevant_frame: _, diff --git a/src/tools/miri/src/shims/panic.rs b/src/tools/miri/src/shims/panic.rs index a6bce8301491f..b3b7b6ae7187b 100644 --- a/src/tools/miri/src/shims/panic.rs +++ b/src/tools/miri/src/shims/panic.rs @@ -51,7 +51,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let payload = this.read_immediate(payload)?; let thread = this.active_thread_mut(); - thread.panic_payloads.push(payload); + thread.unwind_payloads.push(payload); interp_ok(()) } @@ -132,7 +132,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // The Thread's `panic_payload` holds what was passed to `miri_start_unwind`. // This is exactly the second argument we need to pass to `catch_fn`. - let payload = this.active_thread_mut().panic_payloads.pop().unwrap(); + let payload = this.active_thread_mut().unwind_payloads.pop().unwrap(); // Push the `catch_fn` stackframe. let f_instance = this.get_ptr_fn(catch_unwind.catch_fn)?.as_instance()?; From 59af8f36e31165dc2fa5968941a2edad3e1c0025 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Mon, 7 Jul 2025 04:58:18 +0000 Subject: [PATCH 05/24] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 0130cf13a45f7..7ca7acb3fd709 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -733b47ea4b1b86216f14ef56e49440c33933f230 +ca98d4d4b3114116203699c2734805547df86f9a From ae3b9765f4510362b3e7ce7e1a62841b2cf5887d Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 7 Jul 2025 08:18:32 +0200 Subject: [PATCH 06/24] split unwinding logic from panic logic --- src/tools/miri/src/lib.rs | 3 +- src/tools/miri/src/shims/mod.rs | 1 + src/tools/miri/src/shims/panic.rs | 152 +-------------------------- src/tools/miri/src/shims/unwind.rs | 160 +++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 152 deletions(-) create mode 100644 src/tools/miri/src/shims/unwind.rs diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index c86e33e518591..ca99d69b32d0a 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -154,9 +154,10 @@ pub use crate::shims::env::{EnvVars, EvalContextExt as _}; pub use crate::shims::foreign_items::{DynSym, EvalContextExt as _}; pub use crate::shims::io_error::{EvalContextExt as _, IoError, LibcError}; pub use crate::shims::os_str::EvalContextExt as _; -pub use crate::shims::panic::{CatchUnwindData, EvalContextExt as _}; +pub use crate::shims::panic::EvalContextExt as _; pub use crate::shims::time::EvalContextExt as _; pub use crate::shims::tls::TlsData; +pub use crate::shims::unwind::{CatchUnwindData, EvalContextExt as _}; /// Insert rustc arguments at the beginning of the argument list that Miri wants to be /// set per default, for maximal validation power. diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs index f09fc546b3eba..60c0e289292bd 100644 --- a/src/tools/miri/src/shims/mod.rs +++ b/src/tools/miri/src/shims/mod.rs @@ -19,6 +19,7 @@ pub mod os_str; pub mod panic; pub mod time; pub mod tls; +pub mod unwind; pub use self::files::FdTable; //#[cfg(target_os = "linux")] diff --git a/src/tools/miri/src/shims/panic.rs b/src/tools/miri/src/shims/panic.rs index b3b7b6ae7187b..5fcb783d68862 100644 --- a/src/tools/miri/src/shims/panic.rs +++ b/src/tools/miri/src/shims/panic.rs @@ -1,162 +1,12 @@ -//! Panic runtime for Miri. -//! -//! The core pieces of the runtime are: -//! - An implementation of `__rust_maybe_catch_panic` that pushes the invoked stack frame with -//! some extra metadata derived from the panic-catching arguments of `__rust_maybe_catch_panic`. -//! - A hack in `libpanic_unwind` that calls the `miri_start_unwind` intrinsic instead of the -//! target-native panic runtime. (This lives in the rustc repo.) -//! - An implementation of `miri_start_unwind` that stores its argument (the panic payload), and then -//! immediately returns, but on the *unwind* edge (not the normal return edge), thus initiating unwinding. -//! - A hook executed each time a frame is popped, such that if the frame pushed by `__rust_maybe_catch_panic` -//! gets popped *during unwinding*, we take the panic payload and store it according to the extra -//! metadata we remembered when pushing said frame. +//! Helper functions for causing panics. use rustc_abi::ExternAbi; use rustc_middle::{mir, ty}; -use rustc_target::spec::PanicStrategy; -use self::helpers::check_intrinsic_arg_count; use crate::*; -/// Holds all of the relevant data for when unwinding hits a `try` frame. -#[derive(Debug)] -pub struct CatchUnwindData<'tcx> { - /// The `catch_fn` callback to call in case of a panic. - catch_fn: Pointer, - /// The `data` argument for that callback. - data: ImmTy<'tcx>, - /// The return place from the original call to `try`. - dest: MPlaceTy<'tcx>, - /// The return block from the original call to `try`. - ret: Option, -} - -impl VisitProvenance for CatchUnwindData<'_> { - fn visit_provenance(&self, visit: &mut VisitWith<'_>) { - let CatchUnwindData { catch_fn, data, dest, ret: _ } = self; - catch_fn.visit_provenance(visit); - data.visit_provenance(visit); - dest.visit_provenance(visit); - } -} - impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { - /// Handles the special `miri_start_unwind` intrinsic, which is called - /// by libpanic_unwind to delegate the actual unwinding process to Miri. - fn handle_miri_start_unwind(&mut self, payload: &OpTy<'tcx>) -> InterpResult<'tcx> { - let this = self.eval_context_mut(); - - trace!("miri_start_unwind: {:?}", this.frame().instance()); - - let payload = this.read_immediate(payload)?; - let thread = this.active_thread_mut(); - thread.unwind_payloads.push(payload); - - interp_ok(()) - } - - /// Handles the `catch_unwind` intrinsic. - fn handle_catch_unwind( - &mut self, - args: &[OpTy<'tcx>], - dest: &MPlaceTy<'tcx>, - ret: Option, - ) -> InterpResult<'tcx> { - let this = self.eval_context_mut(); - - // Signature: - // fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32 - // Calls `try_fn` with `data` as argument. If that executes normally, returns 0. - // If that unwinds, calls `catch_fn` with the first argument being `data` and - // then second argument being a target-dependent `payload` (i.e. it is up to us to define - // what that is), and returns 1. - // The `payload` is passed (by libstd) to `__rust_panic_cleanup`, which is then expected to - // return a `Box`. - // In Miri, `miri_start_unwind` is passed exactly that type, so we make the `payload` simply - // a pointer to `Box`. - - // Get all the arguments. - let [try_fn, data, catch_fn] = check_intrinsic_arg_count(args)?; - let try_fn = this.read_pointer(try_fn)?; - let data = this.read_immediate(data)?; - let catch_fn = this.read_pointer(catch_fn)?; - - // Now we make a function call, and pass `data` as first and only argument. - let f_instance = this.get_ptr_fn(try_fn)?.as_instance()?; - trace!("try_fn: {:?}", f_instance); - #[allow(clippy::cloned_ref_to_slice_refs)] // the code is clearer as-is - this.call_function( - f_instance, - ExternAbi::Rust, - &[data.clone()], - None, - // Directly return to caller. - StackPopCleanup::Goto { ret, unwind: mir::UnwindAction::Continue }, - )?; - - // We ourselves will return `0`, eventually (will be overwritten if we catch a panic). - this.write_null(dest)?; - - // In unwind mode, we tag this frame with the extra data needed to catch unwinding. - // This lets `handle_stack_pop` (below) know that we should stop unwinding - // when we pop this frame. - if this.tcx.sess.panic_strategy() == PanicStrategy::Unwind { - this.frame_mut().extra.catch_unwind = - Some(CatchUnwindData { catch_fn, data, dest: dest.clone(), ret }); - } - - interp_ok(()) - } - - fn handle_stack_pop_unwind( - &mut self, - mut extra: FrameExtra<'tcx>, - unwinding: bool, - ) -> InterpResult<'tcx, ReturnAction> { - let this = self.eval_context_mut(); - trace!("handle_stack_pop_unwind(extra = {:?}, unwinding = {})", extra, unwinding); - - // We only care about `catch_panic` if we're unwinding - if we're doing a normal - // return, then we don't need to do anything special. - if let (true, Some(catch_unwind)) = (unwinding, extra.catch_unwind.take()) { - // We've just popped a frame that was pushed by `catch_unwind`, - // and we are unwinding, so we should catch that. - trace!( - "unwinding: found catch_panic frame during unwinding: {:?}", - this.frame().instance() - ); - - // We set the return value of `catch_unwind` to 1, since there was a panic. - this.write_scalar(Scalar::from_i32(1), &catch_unwind.dest)?; - - // The Thread's `panic_payload` holds what was passed to `miri_start_unwind`. - // This is exactly the second argument we need to pass to `catch_fn`. - let payload = this.active_thread_mut().unwind_payloads.pop().unwrap(); - - // Push the `catch_fn` stackframe. - let f_instance = this.get_ptr_fn(catch_unwind.catch_fn)?.as_instance()?; - trace!("catch_fn: {:?}", f_instance); - this.call_function( - f_instance, - ExternAbi::Rust, - &[catch_unwind.data, payload], - None, - // Directly return to caller of `catch_unwind`. - StackPopCleanup::Goto { - ret: catch_unwind.ret, - // `catch_fn` must not unwind. - unwind: mir::UnwindAction::Unreachable, - }, - )?; - - // We pushed a new stack frame, the engine should not do any jumping now! - interp_ok(ReturnAction::NoJump) - } else { - interp_ok(ReturnAction::Normal) - } - } - /// Start a panic in the interpreter with the given message as payload. fn start_panic(&mut self, msg: &str, unwind: mir::UnwindAction) -> InterpResult<'tcx> { let this = self.eval_context_mut(); diff --git a/src/tools/miri/src/shims/unwind.rs b/src/tools/miri/src/shims/unwind.rs new file mode 100644 index 0000000000000..7cbbbb7f4d715 --- /dev/null +++ b/src/tools/miri/src/shims/unwind.rs @@ -0,0 +1,160 @@ +//! Unwinding runtime for Miri. +//! +//! The core pieces of the runtime are: +//! - An implementation of `catch_unwind` that pushes the invoked stack frame with +//! some extra metadata derived from the panic-catching arguments of `catch_unwind`. +//! - A hack in `libpanic_unwind` that calls the `miri_start_unwind` intrinsic instead of the +//! target-native panic runtime. (This lives in the rustc repo.) +//! - An implementation of `miri_start_unwind` that stores its argument (the panic payload), and +//! then immediately returns, but on the *unwind* edge (not the normal return edge), thus +//! initiating unwinding. +//! - A hook executed each time a frame is popped, such that if the frame pushed by `catch_unwind` +//! gets popped *during unwinding*, we take the panic payload and store it according to the extra +//! metadata we remembered when pushing said frame. + +use rustc_abi::ExternAbi; +use rustc_middle::mir; +use rustc_target::spec::PanicStrategy; + +use self::helpers::check_intrinsic_arg_count; +use crate::*; + +/// Holds all of the relevant data for when unwinding hits a `try` frame. +#[derive(Debug)] +pub struct CatchUnwindData<'tcx> { + /// The `catch_fn` callback to call in case of a panic. + catch_fn: Pointer, + /// The `data` argument for that callback. + data: ImmTy<'tcx>, + /// The return place from the original call to `try`. + dest: MPlaceTy<'tcx>, + /// The return block from the original call to `try`. + ret: Option, +} + +impl VisitProvenance for CatchUnwindData<'_> { + fn visit_provenance(&self, visit: &mut VisitWith<'_>) { + let CatchUnwindData { catch_fn, data, dest, ret: _ } = self; + catch_fn.visit_provenance(visit); + data.visit_provenance(visit); + dest.visit_provenance(visit); + } +} + +impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} +pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { + /// Handles the special `miri_start_unwind` intrinsic, which is called + /// by libpanic_unwind to delegate the actual unwinding process to Miri. + fn handle_miri_start_unwind(&mut self, payload: &OpTy<'tcx>) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + + trace!("miri_start_unwind: {:?}", this.frame().instance()); + + let payload = this.read_immediate(payload)?; + let thread = this.active_thread_mut(); + thread.unwind_payloads.push(payload); + + interp_ok(()) + } + + /// Handles the `catch_unwind` intrinsic. + fn handle_catch_unwind( + &mut self, + args: &[OpTy<'tcx>], + dest: &MPlaceTy<'tcx>, + ret: Option, + ) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + + // Signature: + // fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32 + // Calls `try_fn` with `data` as argument. If that executes normally, returns 0. + // If that unwinds, calls `catch_fn` with the first argument being `data` and + // then second argument being a target-dependent `payload` (i.e. it is up to us to define + // what that is), and returns 1. + // The `payload` is passed (by libstd) to `__rust_panic_cleanup`, which is then expected to + // return a `Box`. + // In Miri, `miri_start_unwind` is passed exactly that type, so we make the `payload` simply + // a pointer to `Box`. + + // Get all the arguments. + let [try_fn, data, catch_fn] = check_intrinsic_arg_count(args)?; + let try_fn = this.read_pointer(try_fn)?; + let data = this.read_immediate(data)?; + let catch_fn = this.read_pointer(catch_fn)?; + + // Now we make a function call, and pass `data` as first and only argument. + let f_instance = this.get_ptr_fn(try_fn)?.as_instance()?; + trace!("try_fn: {:?}", f_instance); + #[allow(clippy::cloned_ref_to_slice_refs)] // the code is clearer as-is + this.call_function( + f_instance, + ExternAbi::Rust, + &[data.clone()], + None, + // Directly return to caller. + StackPopCleanup::Goto { ret, unwind: mir::UnwindAction::Continue }, + )?; + + // We ourselves will return `0`, eventually (will be overwritten if we catch a panic). + this.write_null(dest)?; + + // In unwind mode, we tag this frame with the extra data needed to catch unwinding. + // This lets `handle_stack_pop` (below) know that we should stop unwinding + // when we pop this frame. + if this.tcx.sess.panic_strategy() == PanicStrategy::Unwind { + this.frame_mut().extra.catch_unwind = + Some(CatchUnwindData { catch_fn, data, dest: dest.clone(), ret }); + } + + interp_ok(()) + } + + fn handle_stack_pop_unwind( + &mut self, + mut extra: FrameExtra<'tcx>, + unwinding: bool, + ) -> InterpResult<'tcx, ReturnAction> { + let this = self.eval_context_mut(); + trace!("handle_stack_pop_unwind(extra = {:?}, unwinding = {})", extra, unwinding); + + // We only care about `catch_panic` if we're unwinding - if we're doing a normal + // return, then we don't need to do anything special. + if let (true, Some(catch_unwind)) = (unwinding, extra.catch_unwind.take()) { + // We've just popped a frame that was pushed by `catch_unwind`, + // and we are unwinding, so we should catch that. + trace!( + "unwinding: found catch_panic frame during unwinding: {:?}", + this.frame().instance() + ); + + // We set the return value of `catch_unwind` to 1, since there was a panic. + this.write_scalar(Scalar::from_i32(1), &catch_unwind.dest)?; + + // The Thread's `panic_payload` holds what was passed to `miri_start_unwind`. + // This is exactly the second argument we need to pass to `catch_fn`. + let payload = this.active_thread_mut().unwind_payloads.pop().unwrap(); + + // Push the `catch_fn` stackframe. + let f_instance = this.get_ptr_fn(catch_unwind.catch_fn)?.as_instance()?; + trace!("catch_fn: {:?}", f_instance); + this.call_function( + f_instance, + ExternAbi::Rust, + &[catch_unwind.data, payload], + None, + // Directly return to caller of `catch_unwind`. + StackPopCleanup::Goto { + ret: catch_unwind.ret, + // `catch_fn` must not unwind. + unwind: mir::UnwindAction::Unreachable, + }, + )?; + + // We pushed a new stack frame, the engine should not do any jumping now! + interp_ok(ReturnAction::NoJump) + } else { + interp_ok(ReturnAction::Normal) + } + } +} From e296e6bdfed270453e5423e68cb9f3b7ca49fec3 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 7 Jul 2025 17:53:23 +0200 Subject: [PATCH 07/24] move our data structures into a central location --- .../src/borrow_tracker/stacked_borrows/mod.rs | 4 ++-- .../src/borrow_tracker/tree_borrows/mod.rs | 2 +- .../src/borrow_tracker/tree_borrows/tree.rs | 6 ++--- src/tools/miri/src/concurrency/data_race.rs | 4 ++-- src/tools/miri/src/concurrency/mod.rs | 1 - src/tools/miri/src/concurrency/weak_memory.rs | 2 +- .../dedup_range_map.rs} | 22 +++++++++---------- src/tools/miri/src/data_structures/mod.rs | 3 +++ .../{ => data_structures}/mono_hash_map.rs | 0 .../range_object_map.rs | 0 src/tools/miri/src/lib.rs | 7 +++--- 11 files changed, 26 insertions(+), 25 deletions(-) rename src/tools/miri/src/{range_map.rs => data_structures/dedup_range_map.rs} (95%) create mode 100644 src/tools/miri/src/data_structures/mod.rs rename src/tools/miri/src/{ => data_structures}/mono_hash_map.rs (100%) rename src/tools/miri/src/{concurrency => data_structures}/range_object_map.rs (100%) diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs index b8bcacf7c9943..834a4b41f2245 100644 --- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs @@ -30,7 +30,7 @@ pub type AllocState = Stacks; #[derive(Clone, Debug)] pub struct Stacks { // Even reading memory can have effects on the stack, so we need a `RefCell` here. - stacks: RangeMap, + stacks: DedupRangeMap, /// Stores past operations on this allocation history: AllocHistory, /// The set of tags that have been exposed inside this allocation. @@ -468,7 +468,7 @@ impl<'tcx> Stacks { let stack = Stack::new(item); Stacks { - stacks: RangeMap::new(size, stack), + stacks: DedupRangeMap::new(size, stack), history: AllocHistory::new(id, item, machine), exposed_tags: FxHashSet::default(), } diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs index a0761cb07a1da..c157c69d7c843 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs @@ -314,7 +314,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let span = this.machine.current_span(); // Store initial permissions and their corresponding range. - let mut perms_map: RangeMap = RangeMap::new( + let mut perms_map: DedupRangeMap = DedupRangeMap::new( ptr_size, LocationState::new_accessed(Permission::new_disabled(), IdempotentForeignAccess::None), // this will be overwritten ); diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs index 48e4a19e263fb..1f29bcfc2b035 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs @@ -247,7 +247,7 @@ pub struct Tree { /// `unwrap` any `perm.get(key)`. /// /// We do uphold the fact that `keys(perms)` is a subset of `keys(nodes)` - pub(super) rperms: RangeMap>, + pub(super) rperms: DedupRangeMap>, /// The index of the root node. pub(super) root: UniIndex, } @@ -609,7 +609,7 @@ impl Tree { IdempotentForeignAccess::None, ), ); - RangeMap::new(size, perms) + DedupRangeMap::new(size, perms) }; Self { root: root_idx, nodes, rperms, tag_mapping } } @@ -631,7 +631,7 @@ impl<'tcx> Tree { base_offset: Size, parent_tag: BorTag, new_tag: BorTag, - initial_perms: RangeMap, + initial_perms: DedupRangeMap, default_perm: Permission, protected: bool, span: Span, diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs index b5e7e9d0ac0d8..38d76f5cf73b7 100644 --- a/src/tools/miri/src/concurrency/data_race.rs +++ b/src/tools/miri/src/concurrency/data_race.rs @@ -997,7 +997,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> { #[derive(Debug, Clone)] pub struct VClockAlloc { /// Assigning each byte a MemoryCellClocks. - alloc_ranges: RefCell>, + alloc_ranges: RefCell>, } impl VisitProvenance for VClockAlloc { @@ -1045,7 +1045,7 @@ impl VClockAlloc { (VTimestamp::ZERO, global.thread_index(ThreadId::MAIN_THREAD)), }; VClockAlloc { - alloc_ranges: RefCell::new(RangeMap::new( + alloc_ranges: RefCell::new(DedupRangeMap::new( len, MemoryCellClocks::new(alloc_timestamp, alloc_index), )), diff --git a/src/tools/miri/src/concurrency/mod.rs b/src/tools/miri/src/concurrency/mod.rs index 49bcc0d30b506..c2ea8a00decd8 100644 --- a/src/tools/miri/src/concurrency/mod.rs +++ b/src/tools/miri/src/concurrency/mod.rs @@ -2,7 +2,6 @@ pub mod cpu_affinity; pub mod data_race; mod data_race_handler; pub mod init_once; -mod range_object_map; pub mod sync; pub mod thread; mod vector_clock; diff --git a/src/tools/miri/src/concurrency/weak_memory.rs b/src/tools/miri/src/concurrency/weak_memory.rs index 95c010be2fd21..a752ef7e45480 100644 --- a/src/tools/miri/src/concurrency/weak_memory.rs +++ b/src/tools/miri/src/concurrency/weak_memory.rs @@ -90,9 +90,9 @@ use rustc_data_structures::fx::FxHashMap; use super::AllocDataRaceHandler; use super::data_race::{GlobalState as DataRaceState, ThreadClockSet}; -use super::range_object_map::{AccessType, RangeObjectMap}; use super::vector_clock::{VClock, VTimestamp, VectorIdx}; use crate::concurrency::GlobalDataRaceHandler; +use crate::data_structures::range_object_map::{AccessType, RangeObjectMap}; use crate::*; pub type AllocState = StoreBufferAlloc; diff --git a/src/tools/miri/src/range_map.rs b/src/tools/miri/src/data_structures/dedup_range_map.rs similarity index 95% rename from src/tools/miri/src/range_map.rs rename to src/tools/miri/src/data_structures/dedup_range_map.rs index 29a5a8537a41d..56e9883b1caaf 100644 --- a/src/tools/miri/src/range_map.rs +++ b/src/tools/miri/src/data_structures/dedup_range_map.rs @@ -17,18 +17,18 @@ struct Elem { data: T, } #[derive(Clone, Debug)] -pub struct RangeMap { +pub struct DedupRangeMap { v: Vec>, } -impl RangeMap { +impl DedupRangeMap { /// Creates a new `RangeMap` for the given size, and with the given initial value used for /// the entire range. #[inline(always)] - pub fn new(size: Size, init: T) -> RangeMap { + pub fn new(size: Size, init: T) -> DedupRangeMap { let size = size.bytes(); let v = if size > 0 { vec![Elem { range: 0..size, data: init }] } else { Vec::new() }; - RangeMap { v } + DedupRangeMap { v } } pub fn size(&self) -> Size { @@ -246,7 +246,7 @@ mod tests { use super::*; /// Query the map at every offset in the range and collect the results. - fn to_vec(map: &RangeMap, offset: u64, len: u64) -> Vec { + fn to_vec(map: &DedupRangeMap, offset: u64, len: u64) -> Vec { (offset..offset + len) .map(|i| { map.iter(Size::from_bytes(i), Size::from_bytes(1)).next().map(|(_, &t)| t).unwrap() @@ -256,7 +256,7 @@ mod tests { #[test] fn basic_insert() { - let mut map = RangeMap::::new(Size::from_bytes(20), -1); + let mut map = DedupRangeMap::::new(Size::from_bytes(20), -1); // Insert. for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) { *x = 42; @@ -278,7 +278,7 @@ mod tests { #[test] fn gaps() { - let mut map = RangeMap::::new(Size::from_bytes(20), -1); + let mut map = DedupRangeMap::::new(Size::from_bytes(20), -1); for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) { *x = 42; } @@ -319,26 +319,26 @@ mod tests { #[test] #[should_panic] fn out_of_range_iter_mut() { - let mut map = RangeMap::::new(Size::from_bytes(20), -1); + let mut map = DedupRangeMap::::new(Size::from_bytes(20), -1); let _ = map.iter_mut(Size::from_bytes(11), Size::from_bytes(11)); } #[test] #[should_panic] fn out_of_range_iter() { - let map = RangeMap::::new(Size::from_bytes(20), -1); + let map = DedupRangeMap::::new(Size::from_bytes(20), -1); let _ = map.iter(Size::from_bytes(11), Size::from_bytes(11)); } #[test] fn empty_map_iter() { - let map = RangeMap::::new(Size::from_bytes(0), -1); + let map = DedupRangeMap::::new(Size::from_bytes(0), -1); let _ = map.iter(Size::from_bytes(0), Size::from_bytes(0)); } #[test] fn empty_map_iter_mut() { - let mut map = RangeMap::::new(Size::from_bytes(0), -1); + let mut map = DedupRangeMap::::new(Size::from_bytes(0), -1); let _ = map.iter_mut(Size::from_bytes(0), Size::from_bytes(0)); } } diff --git a/src/tools/miri/src/data_structures/mod.rs b/src/tools/miri/src/data_structures/mod.rs new file mode 100644 index 0000000000000..d4468bc57eae9 --- /dev/null +++ b/src/tools/miri/src/data_structures/mod.rs @@ -0,0 +1,3 @@ +pub mod dedup_range_map; +pub mod mono_hash_map; +pub mod range_object_map; diff --git a/src/tools/miri/src/mono_hash_map.rs b/src/tools/miri/src/data_structures/mono_hash_map.rs similarity index 100% rename from src/tools/miri/src/mono_hash_map.rs rename to src/tools/miri/src/data_structures/mono_hash_map.rs diff --git a/src/tools/miri/src/concurrency/range_object_map.rs b/src/tools/miri/src/data_structures/range_object_map.rs similarity index 100% rename from src/tools/miri/src/concurrency/range_object_map.rs rename to src/tools/miri/src/data_structures/range_object_map.rs diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index ca99d69b32d0a..642b7ee984019 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -75,16 +75,15 @@ mod alloc_addresses; mod borrow_tracker; mod clock; mod concurrency; +mod data_structures; mod diagnostics; mod eval; mod helpers; mod intrinsics; mod machine; mod math; -mod mono_hash_map; mod operator; mod provenance_gc; -mod range_map; mod shims; // Establish a "crate-wide prelude": we often import `crate::*`. @@ -132,6 +131,8 @@ pub use crate::concurrency::thread::{ ThreadManager, TimeoutAnchor, TimeoutClock, UnblockKind, }; pub use crate::concurrency::{GenmcConfig, GenmcCtx}; +pub use crate::data_structures::dedup_range_map::DedupRangeMap; +pub use crate::data_structures::mono_hash_map::MonoHashMap; pub use crate::diagnostics::{ EvalContextExt as _, NonHaltingDiagnostic, TerminationInfo, report_error, }; @@ -145,10 +146,8 @@ pub use crate::machine::{ AllocExtra, DynMachineCallback, FrameExtra, MachineCallback, MemoryKind, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra, }; -pub use crate::mono_hash_map::MonoHashMap; pub use crate::operator::EvalContextExt as _; pub use crate::provenance_gc::{EvalContextExt as _, LiveAllocs, VisitProvenance, VisitWith}; -pub use crate::range_map::RangeMap; pub use crate::shims::EmulateItemResult; pub use crate::shims::env::{EnvVars, EvalContextExt as _}; pub use crate::shims::foreign_items::{DynSym, EvalContextExt as _}; From 0d306dd7f43574eede5bfdd883c298a07da0b2cc Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 8 Jul 2025 08:22:53 +0200 Subject: [PATCH 08/24] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 7ca7acb3fd709..a9394819541f1 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -ca98d4d4b3114116203699c2734805547df86f9a +688ea65df6a47866d0f72a00f1e18b47a7edf83b From 3bd0b8909ca743f8089d256d3a4443787f966315 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Wed, 2 Jul 2025 02:55:41 +0200 Subject: [PATCH 09/24] various native-lib-trace-related fixups Co-Authored-By: Ralf Jung --- src/tools/miri/src/alloc/isolated_alloc.rs | 21 +- src/tools/miri/src/bin/miri.rs | 30 +- src/tools/miri/src/lib.rs | 8 +- src/tools/miri/src/shims/mod.rs | 4 +- src/tools/miri/src/shims/native_lib/mod.rs | 96 +++--- .../miri/src/shims/native_lib/trace/child.rs | 104 +++--- .../src/shims/native_lib/trace/messages.rs | 47 +-- .../miri/src/shims/native_lib/trace/mod.rs | 2 + .../miri/src/shims/native_lib/trace/parent.rs | 304 ++++++++---------- .../miri/src/shims/native_lib/trace/stub.rs | 34 ++ 10 files changed, 312 insertions(+), 338 deletions(-) create mode 100644 src/tools/miri/src/shims/native_lib/trace/stub.rs diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index a7bb9b4da7580..7b2f1a3eebf3d 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -302,23 +302,20 @@ impl IsolatedAlloc { } } - /// Returns a vector of page addresses managed by the allocator. - pub fn pages(&self) -> Vec { - let mut pages: Vec = - self.page_ptrs.iter().map(|p| p.expose_provenance().get()).collect(); - for (ptr, size) in self.huge_ptrs.iter() { - for i in 0..size / self.page_size { - pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size)); - } - } - pages + /// Returns a list of page addresses managed by the allocator. + pub fn pages(&self) -> impl Iterator { + let pages = self.page_ptrs.iter().map(|p| p.expose_provenance().get()); + pages.chain(self.huge_ptrs.iter().flat_map(|(ptr, size)| { + (0..size / self.page_size) + .map(|i| ptr.expose_provenance().get().strict_add(i * self.page_size)) + })) } /// Protects all owned memory as `PROT_NONE`, preventing accesses. /// /// SAFETY: Accessing memory after this point will result in a segfault /// unless it is first unprotected. - pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> { + pub unsafe fn start_ffi(&mut self) -> Result<(), nix::errno::Errno> { let prot = mman::ProtFlags::PROT_NONE; unsafe { self.mprotect(prot) } } @@ -326,7 +323,7 @@ impl IsolatedAlloc { /// Deprotects all owned memory by setting it to RW. Erroring here is very /// likely unrecoverable, so it may panic if applying those permissions /// fails. - pub fn unprep_ffi(&mut self) { + pub fn end_ffi(&mut self) { let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE; unsafe { self.mprotect(prot).unwrap(); diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs index e3b579a4ac61d..61cebedf08104 100644 --- a/src/tools/miri/src/bin/miri.rs +++ b/src/tools/miri/src/bin/miri.rs @@ -233,8 +233,6 @@ impl rustc_driver::Callbacks for MiriCompilerCalls { } else { let return_code = miri::eval_entry(tcx, entry_def_id, entry_type, &config, None) .unwrap_or_else(|| { - //#[cfg(target_os = "linux")] - //miri::native_lib::register_retcode_sv(rustc_driver::EXIT_FAILURE); tcx.dcx().abort_if_errors(); rustc_driver::EXIT_FAILURE }); @@ -337,6 +335,9 @@ impl rustc_driver::Callbacks for MiriBeRustCompilerCalls { fn exit(exit_code: i32) -> ! { // Drop the tracing guard before exiting, so tracing calls are flushed correctly. deinit_loggers(); + // Make sure the supervisor knows about the code code. + #[cfg(target_os = "linux")] + miri::native_lib::register_retcode_sv(exit_code); std::process::exit(exit_code); } @@ -355,6 +356,11 @@ fn run_compiler_and_exit( args: &[String], callbacks: &mut (dyn rustc_driver::Callbacks + Send), ) -> ! { + // Install the ctrlc handler that sets `rustc_const_eval::CTRL_C_RECEIVED`, even if + // MIRI_BE_RUSTC is set. We do this late so that when `native_lib::init_sv` is called, + // there are no other threads. + rustc_driver::install_ctrlc_handler(); + // Invoke compiler, catch any unwinding panics and handle return code. let exit_code = rustc_driver::catch_with_exit_code(move || rustc_driver::run_compiler(args, callbacks)); @@ -439,10 +445,6 @@ fn main() { let args = rustc_driver::catch_fatal_errors(|| rustc_driver::args::raw_args(&early_dcx)) .unwrap_or_else(|_| std::process::exit(rustc_driver::EXIT_FAILURE)); - // Install the ctrlc handler that sets `rustc_const_eval::CTRL_C_RECEIVED`, even if - // MIRI_BE_RUSTC is set. - rustc_driver::install_ctrlc_handler(); - // If the environment asks us to actually be rustc, then do that. if let Some(crate_kind) = env::var_os("MIRI_BE_RUSTC") { // Earliest rustc setup. @@ -750,15 +752,15 @@ fn main() { debug!("rustc arguments: {:?}", rustc_args); debug!("crate arguments: {:?}", miri_config.args); - #[cfg(target_os = "linux")] if !miri_config.native_lib.is_empty() && miri_config.native_lib_enable_tracing { - // FIXME: This should display a diagnostic / warning on error - // SAFETY: If any other threads exist at this point (namely for the ctrlc - // handler), they will not interact with anything on the main rustc/Miri - // thread in an async-signal-unsafe way such as by accessing shared - // semaphores, etc.; the handler only calls `sleep()` and `exit()`, which - // are async-signal-safe, as is accessing atomics - //let _ = unsafe { miri::native_lib::init_sv() }; + // SAFETY: No other threads are running + #[cfg(target_os = "linux")] + if unsafe { miri::native_lib::init_sv() }.is_err() { + eprintln!( + "warning: The native-lib tracer could not be started. Is this an x86 Linux system, and does Miri have permissions to ptrace?\n\ + Falling back to non-tracing native-lib mode." + ); + } } run_compiler_and_exit( &rustc_args, diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index 642b7ee984019..374f63a16604f 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -96,10 +96,10 @@ pub use rustc_const_eval::interpret::{self, AllocMap, Provenance as _}; use rustc_middle::{bug, span_bug}; use tracing::{info, trace}; -//#[cfg(target_os = "linux")] -//pub mod native_lib { -// pub use crate::shims::{init_sv, register_retcode_sv}; -//} +#[cfg(target_os = "linux")] +pub mod native_lib { + pub use crate::shims::{init_sv, register_retcode_sv}; +} // Type aliases that set the provenance parameter. pub type Pointer = interpret::Pointer>; diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs index 60c0e289292bd..b08ab101e9479 100644 --- a/src/tools/miri/src/shims/mod.rs +++ b/src/tools/miri/src/shims/mod.rs @@ -22,8 +22,8 @@ pub mod tls; pub mod unwind; pub use self::files::FdTable; -//#[cfg(target_os = "linux")] -//pub use self::native_lib::trace::{init_sv, register_retcode_sv}; +#[cfg(target_os = "linux")] +pub use self::native_lib::trace::{init_sv, register_retcode_sv}; pub use self::unix::{DirTable, EpollInterestTable}; /// What needs to be done after emulating an item (a shim or an intrinsic) is done. diff --git a/src/tools/miri/src/shims/native_lib/mod.rs b/src/tools/miri/src/shims/native_lib/mod.rs index 9b30d8ce78bf8..d7432a7300e49 100644 --- a/src/tools/miri/src/shims/native_lib/mod.rs +++ b/src/tools/miri/src/shims/native_lib/mod.rs @@ -1,9 +1,5 @@ //! Implements calling functions from a native library. -// FIXME: disabled since it fails to build on many targets. -//#[cfg(target_os = "linux")] -//pub mod trace; - use std::ops::Deref; use libffi::high::call as ffi; @@ -13,14 +9,55 @@ use rustc_middle::mir::interpret::Pointer; use rustc_middle::ty::{self as ty, IntTy, UintTy}; use rustc_span::Symbol; -//#[cfg(target_os = "linux")] -//use self::trace::Supervisor; +#[cfg_attr( + not(all( + target_os = "linux", + target_env = "gnu", + any(target_arch = "x86", target_arch = "x86_64") + )), + path = "trace/stub.rs" +)] +pub mod trace; + use crate::*; -//#[cfg(target_os = "linux")] -//type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option)>; -//#[cfg(not(target_os = "linux"))] -type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option)>; +/// The final results of an FFI trace, containing every relevant event detected +/// by the tracer. +#[allow(dead_code)] +#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug)] +pub struct MemEvents { + /// An list of memory accesses that occurred, in the order they occurred in. + pub acc_events: Vec, +} + +/// A single memory access. +#[allow(dead_code)] +#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug)] +pub enum AccessEvent { + /// A read may have occurred on this memory range. + /// Some instructions *may* read memory without *always* doing that, + /// so this can be an over-approximation. + /// The range info, however, is reliable if the access did happen. + Read(AccessRange), + /// A read may have occurred on this memory range. + /// Some instructions *may* write memory without *always* doing that, + /// so this can be an over-approximation. + /// The range info, however, is reliable if the access did happen. + Write(AccessRange), +} + +/// The memory touched by a given access. +#[allow(dead_code)] +#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, Debug)] +pub struct AccessRange { + /// The base address in memory where an access occurred. + pub addr: usize, + /// The number of bytes affected from the base. + pub size: usize, +} impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {} trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { @@ -31,18 +68,17 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { dest: &MPlaceTy<'tcx>, ptr: CodePtr, libffi_args: Vec>, - ) -> CallResult<'tcx> { + ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { let this = self.eval_context_mut(); - //#[cfg(target_os = "linux")] - //let alloc = this.machine.allocator.as_ref().unwrap(); - - // SAFETY: We don't touch the machine memory past this point. - //#[cfg(target_os = "linux")] - //let (guard, stack_ptr) = unsafe { Supervisor::start_ffi(alloc) }; + #[cfg(target_os = "linux")] + let alloc = this.machine.allocator.as_ref().unwrap(); + #[cfg(not(target_os = "linux"))] + // Placeholder value. + let alloc = (); - // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value - // as the specified primitive integer type - let res = 'res: { + trace::Supervisor::do_ffi(alloc, || { + // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value + // as the specified primitive integer type let scalar = match dest.layout.ty.kind() { // ints ty::Int(IntTy::I8) => { @@ -93,7 +129,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { // have the output_type `Tuple([])`. ty::Tuple(t_list) if (*t_list).deref().is_empty() => { unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) }; - break 'res interp_ok(ImmTy::uninit(dest.layout)); + return interp_ok(ImmTy::uninit(dest.layout)); } ty::RawPtr(..) => { let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) }; @@ -101,23 +137,14 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { Scalar::from_pointer(ptr, this) } _ => - break 'res Err(err_unsup_format!( + return Err(err_unsup_format!( "unsupported return type for native call: {:?}", link_name )) .into(), }; interp_ok(ImmTy::from_scalar(scalar, dest.layout)) - }; - - // SAFETY: We got the guard and stack pointer from start_ffi, and - // the allocator is the same - //#[cfg(target_os = "linux")] - //let events = unsafe { Supervisor::end_ffi(alloc, guard, stack_ptr) }; - //#[cfg(not(target_os = "linux"))] - let events = None; - - interp_ok((res?, events)) + }) } /// Get the pointer to the function of the specified name in the shared object file, @@ -214,10 +241,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { if !this.machine.native_call_mem_warned.replace(true) { // Newly set, so first time we get here. this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { - //#[cfg(target_os = "linux")] - //tracing: self::trace::Supervisor::is_enabled(), - //#[cfg(not(target_os = "linux"))] - tracing: false, + tracing: self::trace::Supervisor::is_enabled(), }); } diff --git a/src/tools/miri/src/shims/native_lib/trace/child.rs b/src/tools/miri/src/shims/native_lib/trace/child.rs index 4961e875c7751..de26cb0fe5570 100644 --- a/src/tools/miri/src/shims/native_lib/trace/child.rs +++ b/src/tools/miri/src/shims/native_lib/trace/child.rs @@ -4,12 +4,22 @@ use std::rc::Rc; use ipc_channel::ipc; use nix::sys::{ptrace, signal}; use nix::unistd; +use rustc_const_eval::interpret::InterpResult; use super::CALLBACK_STACK_SIZE; -use super::messages::{Confirmation, MemEvents, StartFfiInfo, TraceRequest}; +use super::messages::{Confirmation, StartFfiInfo, TraceRequest}; use super::parent::{ChildListener, sv_loop}; use crate::alloc::isolated_alloc::IsolatedAlloc; +use crate::shims::native_lib::MemEvents; +/// A handle to the single, shared supervisor process across all `MiriMachine`s. +/// Since it would be very difficult to trace multiple FFI calls in parallel, we +/// need to ensure that either (a) only one `MiriMachine` is performing an FFI call +/// at any given time, or (b) there are distinct supervisor and child processes for +/// each machine. The former was chosen here. +/// +/// This should only contain a `None` if the supervisor has not (yet) been initialised; +/// otherwise, if `init_sv` was called and did not error, this will always be nonempty. static SUPERVISOR: std::sync::Mutex> = std::sync::Mutex::new(None); /// The main means of communication between the child and parent process, @@ -34,32 +44,23 @@ impl Supervisor { SUPERVISOR.lock().unwrap().is_some() } - /// Begins preparations for doing an FFI call. This should be called at - /// the last possible moment before entering said call. `alloc` points to - /// the allocator which handed out the memory used for this machine. - /// + /// Performs an arbitrary FFI call, enabling tracing from the supervisor. /// As this locks the supervisor via a mutex, no other threads may enter FFI - /// until this one returns and its guard is dropped via `end_ffi`. The - /// pointer returned should be passed to `end_ffi` to avoid a memory leak. - /// - /// SAFETY: The resulting guard must be dropped *via `end_ffi`* immediately - /// after the desired call has concluded. - pub unsafe fn start_ffi( + /// until this function returns. + pub fn do_ffi<'tcx>( alloc: &Rc>, - ) -> (std::sync::MutexGuard<'static, Option>, Option<*mut [u8; CALLBACK_STACK_SIZE]>) - { + f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>, + ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { let mut sv_guard = SUPERVISOR.lock().unwrap(); - // If the supervisor is not initialised for whatever reason, fast-fail. - // This might be desired behaviour, as even on platforms where ptracing - // is not implemented it enables us to enforce that only one FFI call + // If the supervisor is not initialised for whatever reason, fast-return. + // As a side-effect, even on platforms where ptracing + // is not implemented, we enforce that only one FFI call // happens at a time. - let Some(sv) = sv_guard.take() else { - return (sv_guard, None); - }; + let Some(sv) = sv_guard.as_mut() else { return f().map(|v| (v, None)) }; // Get pointers to all the pages the supervisor must allow accesses in // and prepare the callback stack. - let page_ptrs = alloc.borrow().pages(); + let page_ptrs = alloc.borrow().pages().collect(); let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] = Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast(); let stack_ptr = raw_stack_ptr.expose_provenance(); @@ -68,9 +69,9 @@ impl Supervisor { // SAFETY: We do not access machine memory past this point until the // supervisor is ready to allow it. unsafe { - if alloc.borrow_mut().prepare_ffi().is_err() { + if alloc.borrow_mut().start_ffi().is_err() { // Don't mess up unwinding by maybe leaving the memory partly protected - alloc.borrow_mut().unprep_ffi(); + alloc.borrow_mut().end_ffi(); panic!("Cannot protect memory for FFI call!"); } } @@ -82,27 +83,13 @@ impl Supervisor { // enforce an ordering for these events. sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap(); sv.confirm_rx.recv().unwrap(); - *sv_guard = Some(sv); // We need to be stopped for the supervisor to be able to make certain // modifications to our memory - simply waiting on the recv() doesn't // count. signal::raise(signal::SIGSTOP).unwrap(); - (sv_guard, Some(raw_stack_ptr)) - } - /// Undoes FFI-related preparations, allowing Miri to continue as normal, then - /// gets the memory accesses and changes performed during the FFI call. Note - /// that this may include some spurious accesses done by `libffi` itself in - /// the process of executing the function call. - /// - /// SAFETY: The `sv_guard` and `raw_stack_ptr` passed must be the same ones - /// received by a prior call to `start_ffi`, and the allocator must be the - /// one passed to it also. - pub unsafe fn end_ffi( - alloc: &Rc>, - mut sv_guard: std::sync::MutexGuard<'static, Option>, - raw_stack_ptr: Option<*mut [u8; CALLBACK_STACK_SIZE]>, - ) -> Option { + let res = f(); + // We can't use IPC channels here to signal that FFI mode has ended, // since they might allocate memory which could get us stuck in a SIGTRAP // with no easy way out! While this could be worked around, it is much @@ -113,42 +100,40 @@ impl Supervisor { signal::raise(signal::SIGUSR1).unwrap(); // This is safe! It just sets memory to normal expected permissions. - alloc.borrow_mut().unprep_ffi(); + alloc.borrow_mut().end_ffi(); - // If this is `None`, then `raw_stack_ptr` is None and does not need to - // be deallocated (and there's no need to worry about the guard, since - // it contains nothing). - let sv = sv_guard.take()?; // SAFETY: Caller upholds that this pointer was allocated as a box with // this type. unsafe { - drop(Box::from_raw(raw_stack_ptr.unwrap())); + drop(Box::from_raw(raw_stack_ptr)); } // On the off-chance something really weird happens, don't block forever. - let ret = sv + let events = sv .event_rx .try_recv_timeout(std::time::Duration::from_secs(5)) .map_err(|e| { match e { ipc::TryRecvError::IpcError(_) => (), ipc::TryRecvError::Empty => - eprintln!("Waiting for accesses from supervisor timed out!"), + panic!("Waiting for accesses from supervisor timed out!"), } }) .ok(); - // Do *not* leave the supervisor empty, or else we might get another fork... - *sv_guard = Some(sv); - ret + + res.map(|v| (v, events)) } } /// Initialises the supervisor process. If this function errors, then the /// supervisor process could not be created successfully; else, the caller -/// is now the child process and can communicate via `start_ffi`/`end_ffi`, -/// receiving back events through `get_events`. +/// is now the child process and can communicate via `do_ffi`, receiving back +/// events at the end. /// /// # Safety -/// The invariants for `fork()` must be upheld by the caller. +/// The invariants for `fork()` must be upheld by the caller, namely either: +/// - Other threads do not exist, or; +/// - If they do exist, either those threads or the resulting child process +/// only ever act in [async-signal-safe](https://www.man7.org/linux/man-pages/man7/signal-safety.7.html) ways. pub unsafe fn init_sv() -> Result<(), SvInitError> { // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the // relevant missing bits. @@ -191,8 +176,7 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { // The child process is free to unwind, so we won't to avoid doubly freeing // system resources. let init = std::panic::catch_unwind(|| { - let listener = - ChildListener { message_rx, attached: false, override_retcode: None }; + let listener = ChildListener::new(message_rx, confirm_tx.clone()); // Trace as many things as possible, to be able to handle them as needed. let options = ptrace::Options::PTRACE_O_TRACESYSGOOD | ptrace::Options::PTRACE_O_TRACECLONE @@ -218,7 +202,9 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { // The "Ok" case means that we couldn't ptrace. Ok(e) => return Err(e), Err(p) => { - eprintln!("Supervisor process panicked!\n{p:?}"); + eprintln!( + "Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer." + ); std::process::exit(1); } } @@ -239,13 +225,11 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { } /// Instruct the supervisor process to return a particular code. Useful if for -/// whatever reason this code fails to be intercepted normally. In the case of -/// `abort_if_errors()` used in `bin/miri.rs`, the return code is erroneously -/// given as a 0 if this is not used. +/// whatever reason this code fails to be intercepted normally. pub fn register_retcode_sv(code: i32) { let mut sv_guard = SUPERVISOR.lock().unwrap(); - if let Some(sv) = sv_guard.take() { + if let Some(sv) = sv_guard.as_mut() { sv.message_tx.send(TraceRequest::OverrideRetcode(code)).unwrap(); - *sv_guard = Some(sv); + sv.confirm_rx.recv().unwrap(); } } diff --git a/src/tools/miri/src/shims/native_lib/trace/messages.rs b/src/tools/miri/src/shims/native_lib/trace/messages.rs index 8a83dab5c09d1..1f9df556b577f 100644 --- a/src/tools/miri/src/shims/native_lib/trace/messages.rs +++ b/src/tools/miri/src/shims/native_lib/trace/messages.rs @@ -1,25 +1,28 @@ //! Houses the types that are directly sent across the IPC channels. //! -//! The overall structure of a traced FFI call, from the child process's POV, is -//! as follows: +//! When forking to initialise the supervisor during `init_sv`, the child raises +//! a `SIGSTOP`; if the parent successfully ptraces the child, it will allow it +//! to resume. Else, the child will be killed by the parent. +//! +//! After initialisation is done, the overall structure of a traced FFI call from +//! the child process's POV is as follows: //! ``` //! message_tx.send(TraceRequest::StartFfi); -//! confirm_rx.recv(); +//! confirm_rx.recv(); // receives a `Confirmation` //! raise(SIGSTOP); //! /* do ffi call */ //! raise(SIGUSR1); // morally equivalent to some kind of "TraceRequest::EndFfi" -//! let events = event_rx.recv(); +//! let events = event_rx.recv(); // receives a `MemEvents` //! ``` //! `TraceRequest::OverrideRetcode` can be sent at any point in the above, including -//! before or after all of them. +//! before or after all of them. `confirm_rx.recv()` is to be called after, to ensure +//! that the child does not exit before the supervisor has registered the return code. //! //! NB: sending these events out of order, skipping steps, etc. will result in //! unspecified behaviour from the supervisor process, so use the abstractions -//! in `super::child` (namely `start_ffi()` and `end_ffi()`) to handle this. It is +//! in `super::child` (namely `do_ffi()`) to handle this. It is //! trivially easy to cause a deadlock or crash by messing this up! -use std::ops::Range; - /// An IPC request sent by the child process to the parent. /// /// The sender for this channel should live on the child process. @@ -34,6 +37,8 @@ pub enum TraceRequest { StartFfi(StartFfiInfo), /// Manually overrides the code that the supervisor will return upon exiting. /// Once set, it is permanent. This can be called again to change the value. + /// + /// After sending this, the child must wait to receive a `Confirmation`. OverrideRetcode(i32), } @@ -41,7 +46,7 @@ pub enum TraceRequest { #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] pub struct StartFfiInfo { /// A vector of page addresses. These should have been automatically obtained - /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`. + /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::start_ffi`. pub page_ptrs: Vec, /// The address of an allocation that can serve as a temporary stack. /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int. @@ -54,27 +59,3 @@ pub struct StartFfiInfo { /// The sender for this channel should live on the parent process. #[derive(serde::Serialize, serde::Deserialize, Debug)] pub struct Confirmation; - -/// The final results of an FFI trace, containing every relevant event detected -/// by the tracer. Sent by the supervisor after receiving a `SIGUSR1` signal. -/// -/// The sender for this channel should live on the parent process. -#[derive(serde::Serialize, serde::Deserialize, Debug)] -pub struct MemEvents { - /// An ordered list of memory accesses that occurred. These should be assumed - /// to be overcautious; that is, if the size of an access is uncertain it is - /// pessimistically rounded up, and if the type (read/write/both) is uncertain - /// it is reported as whatever would be safest to assume; i.e. a read + maybe-write - /// becomes a read + write, etc. - pub acc_events: Vec, -} - -/// A single memory access, conservatively overestimated -/// in case of ambiguity. -#[derive(serde::Serialize, serde::Deserialize, Debug)] -pub enum AccessEvent { - /// A read may have occurred on no more than the specified address range. - Read(Range), - /// A write may have occurred on no more than the specified address range. - Write(Range), -} diff --git a/src/tools/miri/src/shims/native_lib/trace/mod.rs b/src/tools/miri/src/shims/native_lib/trace/mod.rs index 174b06b3ac56c..c8abacfb5e222 100644 --- a/src/tools/miri/src/shims/native_lib/trace/mod.rs +++ b/src/tools/miri/src/shims/native_lib/trace/mod.rs @@ -5,4 +5,6 @@ mod parent; pub use self::child::{Supervisor, init_sv, register_retcode_sv}; /// The size of the temporary stack we use for callbacks that the server executes in the client. +/// This should be big enough that `mempr_on` and `mempr_off` can safely be jumped into with the +/// stack pointer pointing to a "stack" of this size without overflowing it. const CALLBACK_STACK_SIZE: usize = 1024; diff --git a/src/tools/miri/src/shims/native_lib/trace/parent.rs b/src/tools/miri/src/shims/native_lib/trace/parent.rs index dfb0b35da699c..fc4047eec51d3 100644 --- a/src/tools/miri/src/shims/native_lib/trace/parent.rs +++ b/src/tools/miri/src/shims/native_lib/trace/parent.rs @@ -5,26 +5,17 @@ use nix::sys::{ptrace, signal, wait}; use nix::unistd; use super::CALLBACK_STACK_SIZE; -use super::messages::{AccessEvent, Confirmation, MemEvents, StartFfiInfo, TraceRequest}; +use super::messages::{Confirmation, StartFfiInfo, TraceRequest}; +use crate::shims::native_lib::{AccessEvent, AccessRange, MemEvents}; /// The flags to use when calling `waitid()`. -/// Since bitwise or on the nix version of these flags is implemented as a trait, -/// this cannot be const directly so we do it this way. const WAIT_FLAGS: wait::WaitPidFlag = - wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED); - -/// Arch-specific maximum size a single access might perform. x86 value is set -/// assuming nothing bigger than AVX-512 is available. -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -const ARCH_MAX_ACCESS_SIZE: usize = 64; -/// The largest arm64 simd instruction operates on 16 bytes. -#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] -const ARCH_MAX_ACCESS_SIZE: usize = 16; + wait::WaitPidFlag::WUNTRACED.union(wait::WaitPidFlag::WEXITED); /// The default word size on a given platform, in bytes. -#[cfg(any(target_arch = "x86", target_arch = "arm"))] +#[cfg(target_arch = "x86")] const ARCH_WORD_SIZE: usize = 4; -#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +#[cfg(target_arch = "x86_64")] const ARCH_WORD_SIZE: usize = 8; /// The address of the page set to be edited, initialised to a sentinel null @@ -53,39 +44,25 @@ trait ArchIndependentRegs { // It's fine / desirable behaviour for values to wrap here, we care about just // preserving the bit pattern. #[cfg(target_arch = "x86_64")] -#[expect(clippy::as_conversions)] #[rustfmt::skip] impl ArchIndependentRegs for libc::user_regs_struct { #[inline] - fn ip(&self) -> usize { self.rip as _ } + fn ip(&self) -> usize { self.rip.try_into().unwrap() } #[inline] - fn set_ip(&mut self, ip: usize) { self.rip = ip as _ } + fn set_ip(&mut self, ip: usize) { self.rip = ip.try_into().unwrap() } #[inline] - fn set_sp(&mut self, sp: usize) { self.rsp = sp as _ } + fn set_sp(&mut self, sp: usize) { self.rsp = sp.try_into().unwrap() } } #[cfg(target_arch = "x86")] -#[expect(clippy::as_conversions)] #[rustfmt::skip] impl ArchIndependentRegs for libc::user_regs_struct { #[inline] - fn ip(&self) -> usize { self.eip as _ } + fn ip(&self) -> usize { self.eip.try_into().unwrap() } #[inline] - fn set_ip(&mut self, ip: usize) { self.eip = ip as _ } + fn set_ip(&mut self, ip: usize) { self.eip = ip.try_into().unwrap() } #[inline] - fn set_sp(&mut self, sp: usize) { self.esp = sp as _ } -} - -#[cfg(target_arch = "aarch64")] -#[expect(clippy::as_conversions)] -#[rustfmt::skip] -impl ArchIndependentRegs for libc::user_regs_struct { - #[inline] - fn ip(&self) -> usize { self.pc as _ } - #[inline] - fn set_ip(&mut self, ip: usize) { self.pc = ip as _ } - #[inline] - fn set_sp(&mut self, sp: usize) { self.sp = sp as _ } + fn set_sp(&mut self, sp: usize) { self.esp = sp.try_into().unwrap() } } /// A unified event representing something happening on the child process. Wraps @@ -109,11 +86,24 @@ pub enum ExecEvent { /// A listener for the FFI start info channel along with relevant state. pub struct ChildListener { /// The matching channel for the child's `Supervisor` struct. - pub message_rx: ipc::IpcReceiver, + message_rx: ipc::IpcReceiver, + /// ... + confirm_tx: ipc::IpcSender, /// Whether an FFI call is currently ongoing. - pub attached: bool, + attached: bool, /// If `Some`, overrides the return code with the given value. - pub override_retcode: Option, + override_retcode: Option, + /// Last code obtained from a child exiting. + last_code: Option, +} + +impl ChildListener { + pub fn new( + message_rx: ipc::IpcReceiver, + confirm_tx: ipc::IpcSender, + ) -> Self { + Self { message_rx, confirm_tx, attached: false, override_retcode: None, last_code: None } + } } impl Iterator for ChildListener { @@ -133,16 +123,10 @@ impl Iterator for ChildListener { Ok(stat) => match stat { // Child exited normally with a specific code set. - wait::WaitStatus::Exited(_, code) => { - let code = self.override_retcode.unwrap_or(code); - return Some(ExecEvent::Died(Some(code))); - } + wait::WaitStatus::Exited(_, code) => self.last_code = Some(code), // Child was killed by a signal, without giving a code. - wait::WaitStatus::Signaled(_, _, _) => - return Some(ExecEvent::Died(self.override_retcode)), - // Child entered a syscall. Since we're always technically - // tracing, only pass this along if we're actively - // monitoring the child. + wait::WaitStatus::Signaled(_, _, _) => self.last_code = None, + // Child entered or exited a syscall. wait::WaitStatus::PtraceSyscall(pid) => if self.attached { return Some(ExecEvent::Syscall(pid)); @@ -179,10 +163,8 @@ impl Iterator for ChildListener { }, _ => (), }, - // This case should only trigger if all children died and we - // somehow missed that, but it's best we not allow any room - // for deadlocks. - Err(_) => return Some(ExecEvent::Died(None)), + // This case should only trigger when all children died. + Err(_) => return Some(ExecEvent::Died(self.override_retcode.or(self.last_code))), } // Similarly, do a non-blocking poll of the IPC channel. @@ -196,7 +178,10 @@ impl Iterator for ChildListener { self.attached = true; return Some(ExecEvent::Start(info)); }, - TraceRequest::OverrideRetcode(code) => self.override_retcode = Some(code), + TraceRequest::OverrideRetcode(code) => { + self.override_retcode = Some(code); + self.confirm_tx.send(Confirmation).unwrap(); + } } } @@ -211,6 +196,12 @@ impl Iterator for ChildListener { #[derive(Debug)] pub struct ExecEnd(pub Option); +/// Whether to call `ptrace::cont()` immediately. Used exclusively by `wait_for_signal`. +enum InitialCont { + Yes, + No, +} + /// This is the main loop of the supervisor process. It runs in a separate /// process from the rest of Miri (but because we fork, addresses for anything /// created before the fork - like statics - are the same). @@ -239,12 +230,12 @@ pub fn sv_loop( let mut curr_pid = init_pid; // There's an initial sigstop we need to deal with. - wait_for_signal(Some(curr_pid), signal::SIGSTOP, false)?; + wait_for_signal(Some(curr_pid), signal::SIGSTOP, InitialCont::No)?; ptrace::cont(curr_pid, None).unwrap(); for evt in listener { match evt { - // start_ffi was called by the child, so prep memory. + // Child started ffi, so prep memory. ExecEvent::Start(ch_info) => { // All the pages that the child process is "allowed to" access. ch_pages = ch_info.page_ptrs; @@ -252,17 +243,17 @@ pub fn sv_loop( ch_stack = Some(ch_info.stack_ptr); // We received the signal and are no longer in the main listener loop, - // so we can let the child move on to the end of start_ffi where it will + // so we can let the child move on to the end of the ffi prep where it will // raise a SIGSTOP. We need it to be signal-stopped *and waited for* in // order to do most ptrace operations! confirm_tx.send(Confirmation).unwrap(); // We can't trust simply calling `Pid::this()` in the child process to give the right // PID for us, so we get it this way. - curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap(); + curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No).unwrap(); ptrace::syscall(curr_pid, None).unwrap(); } - // end_ffi was called by the child. + // Child wants to end tracing. ExecEvent::End => { // Hand over the access info we traced. event_tx.send(MemEvents { acc_events }).unwrap(); @@ -322,10 +313,6 @@ fn get_disasm() -> capstone::Capstone { {cs_pre.x86().mode(arch::x86::ArchMode::Mode64)} #[cfg(target_arch = "x86")] {cs_pre.x86().mode(arch::x86::ArchMode::Mode32)} - #[cfg(target_arch = "aarch64")] - {cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)} - #[cfg(target_arch = "arm")] - {cs_pre.arm().mode(arch::arm::ArchMode::Arm)} } .detail(true) .build() @@ -339,9 +326,9 @@ fn get_disasm() -> capstone::Capstone { fn wait_for_signal( pid: Option, wait_signal: signal::Signal, - init_cont: bool, + init_cont: InitialCont, ) -> Result { - if init_cont { + if matches!(init_cont, InitialCont::Yes) { ptrace::cont(pid.unwrap(), None).unwrap(); } // Repeatedly call `waitid` until we get the signal we want, or the process dies. @@ -374,6 +361,74 @@ fn wait_for_signal( } } +/// Add the memory events from `op` being executed while there is a memory access at `addr` to +/// `acc_events`. Return whether this was a memory operand. +fn capstone_find_events( + addr: usize, + op: &capstone::arch::ArchOperand, + acc_events: &mut Vec, +) -> bool { + use capstone::prelude::*; + match op { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + arch::ArchOperand::X86Operand(x86_operand) => { + match x86_operand.op_type { + // We only care about memory accesses + arch::x86::X86OperandType::Mem(_) => { + let push = AccessRange { addr, size: x86_operand.size.into() }; + // It's called a "RegAccessType" but it also applies to memory + let acc_ty = x86_operand.access.unwrap(); + // The same instruction might do both reads and writes, so potentially add both. + // We do not know the order in which they happened, but writing and then reading + // makes little sense so we put the read first. That is also the more + // conservative choice. + if acc_ty.is_readable() { + acc_events.push(AccessEvent::Read(push.clone())); + } + if acc_ty.is_writable() { + acc_events.push(AccessEvent::Write(push)); + } + + return true; + } + _ => (), + } + } + // FIXME: arm64 + _ => unimplemented!(), + } + + false +} + +/// Extract the events from the given instruction. +fn capstone_disassemble( + instr: &[u8], + addr: usize, + cs: &capstone::Capstone, + acc_events: &mut Vec, +) -> capstone::CsResult<()> { + // The arch_detail is what we care about, but it relies on these temporaries + // that we can't drop. 0x1000 is the default base address for Captsone, and + // we're expecting 1 instruction. + let insns = cs.disasm_count(instr, 0x1000, 1)?; + let ins_detail = cs.insn_detail(&insns[0])?; + let arch_detail = ins_detail.arch_detail(); + + let mut found_mem_op = false; + + for op in arch_detail.operands() { + if capstone_find_events(addr, &op, acc_events) { + if found_mem_op { + panic!("more than one memory operand found; we don't know which one accessed what"); + } + found_mem_op = true; + } + } + + Ok(()) +} + /// Grabs the access that caused a segfault and logs it down if it's to our memory, /// or kills the child and returns the appropriate error otherwise. fn handle_segfault( @@ -384,111 +439,6 @@ fn handle_segfault( cs: &capstone::Capstone, acc_events: &mut Vec, ) -> Result<(), ExecEnd> { - /// This is just here to not pollute the main namespace with `capstone::prelude::*`. - #[inline] - fn capstone_disassemble( - instr: &[u8], - addr: usize, - cs: &capstone::Capstone, - acc_events: &mut Vec, - ) -> capstone::CsResult<()> { - use capstone::prelude::*; - - // The arch_detail is what we care about, but it relies on these temporaries - // that we can't drop. 0x1000 is the default base address for Captsone, and - // we're expecting 1 instruction. - let insns = cs.disasm_count(instr, 0x1000, 1)?; - let ins_detail = cs.insn_detail(&insns[0])?; - let arch_detail = ins_detail.arch_detail(); - - for op in arch_detail.operands() { - match op { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - arch::ArchOperand::X86Operand(x86_operand) => { - match x86_operand.op_type { - // We only care about memory accesses - arch::x86::X86OperandType::Mem(_) => { - let push = addr..addr.strict_add(usize::from(x86_operand.size)); - // It's called a "RegAccessType" but it also applies to memory - let acc_ty = x86_operand.access.unwrap(); - if acc_ty.is_readable() { - acc_events.push(AccessEvent::Read(push.clone())); - } - if acc_ty.is_writable() { - acc_events.push(AccessEvent::Write(push)); - } - } - _ => (), - } - } - #[cfg(target_arch = "aarch64")] - arch::ArchOperand::Arm64Operand(arm64_operand) => { - // Annoyingly, we don't always get the size here, so just be pessimistic for now. - match arm64_operand.op_type { - arch::arm64::Arm64OperandType::Mem(_) => { - // B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes. - let size = match arm64_operand.vas { - // Not an fp/simd instruction. - arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE, - // 1 byte. - arch::arm64::Arm64Vas::ARM64_VAS_1B => 1, - // 2 bytes. - arch::arm64::Arm64Vas::ARM64_VAS_1H => 2, - // 4 bytes. - arch::arm64::Arm64Vas::ARM64_VAS_4B - | arch::arm64::Arm64Vas::ARM64_VAS_2H - | arch::arm64::Arm64Vas::ARM64_VAS_1S => 4, - // 8 bytes. - arch::arm64::Arm64Vas::ARM64_VAS_8B - | arch::arm64::Arm64Vas::ARM64_VAS_4H - | arch::arm64::Arm64Vas::ARM64_VAS_2S - | arch::arm64::Arm64Vas::ARM64_VAS_1D => 8, - // 16 bytes. - arch::arm64::Arm64Vas::ARM64_VAS_16B - | arch::arm64::Arm64Vas::ARM64_VAS_8H - | arch::arm64::Arm64Vas::ARM64_VAS_4S - | arch::arm64::Arm64Vas::ARM64_VAS_2D - | arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16, - }; - let push = addr..addr.strict_add(size); - // FIXME: This now has access type info in the latest - // git version of capstone because this pissed me off - // and I added it. Change this when it updates. - acc_events.push(AccessEvent::Read(push.clone())); - acc_events.push(AccessEvent::Write(push)); - } - _ => (), - } - } - #[cfg(target_arch = "arm")] - arch::ArchOperand::ArmOperand(arm_operand) => - match arm_operand.op_type { - arch::arm::ArmOperandType::Mem(_) => { - // We don't get info on the size of the access, but - // we're at least told if it's a vector instruction. - let size = if arm_operand.vector_index.is_some() { - ARCH_MAX_ACCESS_SIZE - } else { - ARCH_WORD_SIZE - }; - let push = addr..addr.strict_add(size); - let acc_ty = arm_operand.access.unwrap(); - if acc_ty.is_readable() { - acc_events.push(AccessEvent::Read(push.clone())); - } - if acc_ty.is_writable() { - acc_events.push(AccessEvent::Write(push)); - } - } - _ => (), - }, - _ => unimplemented!(), - } - } - - Ok(()) - } - // Get information on what caused the segfault. This contains the address // that triggered it. let siginfo = ptrace::getsiginfo(pid).unwrap(); @@ -515,7 +465,7 @@ fn handle_segfault( // global atomic variables. This is what we use the temporary callback stack for. // - Step 1 instruction // - Parse executed code to estimate size & type of access - // - Reprotect the memory by executing `mempr_on` in the child. + // - Reprotect the memory by executing `mempr_on` in the child, using the callback stack again. // - Continue // Ensure the stack is properly zeroed out! @@ -552,7 +502,7 @@ fn handle_segfault( ptrace::setregs(pid, new_regs).unwrap(); // Our mempr_* functions end with a raise(SIGSTOP). - wait_for_signal(Some(pid), signal::SIGSTOP, true)?; + wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?; // Step 1 instruction. ptrace::setregs(pid, regs_bak).unwrap(); @@ -573,6 +523,12 @@ fn handle_segfault( let regs_bak = ptrace::getregs(pid).unwrap(); new_regs = regs_bak; let ip_poststep = regs_bak.ip(); + + // Ensure that we've actually gone forwards. + assert!(ip_poststep > ip_prestep); + // But not by too much. 64 bytes should be "big enough" on ~any architecture. + assert!(ip_prestep.strict_add(64) > ip_poststep); + // We need to do reads/writes in word-sized chunks. let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE); let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| { @@ -587,20 +543,14 @@ fn handle_segfault( }); // Now figure out the size + type of access and log it down. - // This will mark down e.g. the same area being read multiple times, - // since it's more efficient to compress the accesses at the end. - if capstone_disassemble(&instr, addr, cs, acc_events).is_err() { - // Read goes first because we need to be pessimistic. - acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); - acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); - } + capstone_disassemble(&instr, addr, cs, acc_events).expect("Failed to disassemble instruction"); // Reprotect everything and continue. #[expect(clippy::as_conversions)] new_regs.set_ip(mempr_on as usize); new_regs.set_sp(stack_ptr); ptrace::setregs(pid, new_regs).unwrap(); - wait_for_signal(Some(pid), signal::SIGSTOP, true)?; + wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?; ptrace::setregs(pid, regs_bak).unwrap(); ptrace::syscall(pid, None).unwrap(); diff --git a/src/tools/miri/src/shims/native_lib/trace/stub.rs b/src/tools/miri/src/shims/native_lib/trace/stub.rs new file mode 100644 index 0000000000000..22787a6f6fa78 --- /dev/null +++ b/src/tools/miri/src/shims/native_lib/trace/stub.rs @@ -0,0 +1,34 @@ +use rustc_const_eval::interpret::InterpResult; + +static SUPERVISOR: std::sync::Mutex<()> = std::sync::Mutex::new(()); + +pub struct Supervisor; + +#[derive(Debug)] +pub struct SvInitError; + +impl Supervisor { + #[inline(always)] + pub fn is_enabled() -> bool { + false + } + + pub fn do_ffi<'tcx, T>( + _: T, + f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>, + ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { + // We acquire the lock to ensure that no two FFI calls run concurrently. + let _g = SUPERVISOR.lock().unwrap(); + f().map(|v| (v, None)) + } +} + +#[inline(always)] +#[allow(dead_code, clippy::missing_safety_doc)] +pub unsafe fn init_sv() -> Result { + Err(SvInitError) +} + +#[inline(always)] +#[allow(dead_code)] +pub fn register_retcode_sv(_: T) {} From e726c643e8319ee5eb2a8be45eb65f7600be2d5d Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Thu, 10 Jul 2025 04:58:14 +0000 Subject: [PATCH 10/24] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index a9394819541f1..0af6b8c6fc794 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -688ea65df6a47866d0f72a00f1e18b47a7edf83b +32cd9114712a24010b0583624dc52ac302194128 From 5e940f581008dbc9b84aa1a8672a3c0a940da7be Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Thu, 10 Jul 2025 05:06:47 +0000 Subject: [PATCH 11/24] fmt --- src/tools/miri/src/machine.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 35399dbf4cb0c..7c4d394525bd3 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1828,7 +1828,9 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { fn enter_trace_span(span: impl FnOnce() -> tracing::Span) -> impl EnteredTraceSpan { #[cfg(feature = "tracing")] - { span().entered() } + { + span().entered() + } #[cfg(not(feature = "tracing"))] { let _ = span; // so we avoid the "unused variable" warning From 17b240e04f8d14e5563b6afb2ed3b842f87d1c78 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 10 Jul 2025 08:33:05 +0200 Subject: [PATCH 12/24] silence clippy --- src/tools/miri/src/machine.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 7c4d394525bd3..9c077e5b7b6e9 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1832,6 +1832,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { span().entered() } #[cfg(not(feature = "tracing"))] + #[expect(clippy::unused_unit)] { let _ = span; // so we avoid the "unused variable" warning () From bccc32bf791d7a0f5ccb0f345223548b9ae98844 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 12 Jul 2025 08:55:17 +0200 Subject: [PATCH 13/24] readme: update strict provenance link --- src/tools/miri/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md index b05acff72b50a..7816ce1ac5619 100644 --- a/src/tools/miri/README.md +++ b/src/tools/miri/README.md @@ -342,9 +342,9 @@ environment variable. We first document the most relevant and most commonly used is enabled (the default), this is also used to emulate system entropy. The default seed is 0. You can increase test coverage by running Miri multiple times with different seeds. * `-Zmiri-strict-provenance` enables [strict - provenance](https://github.com/rust-lang/rust/issues/95228) checking in Miri. This means that - casting an integer to a pointer will stop execution because the provenance of the pointer - cannot be determined. + provenance](https://doc.rust-lang.org/nightly/std/ptr/index.html#strict-provenance) checking in + Miri. This means that casting an integer to a pointer will stop execution because the provenance + of the pointer cannot be determined. * `-Zmiri-symbolic-alignment-check` makes the alignment check more strict. By default, alignment is checked by casting the pointer to an integer, and making sure that is a multiple of the alignment. This can lead to cases where a program passes the alignment check by pure chance, because things From dbeb05cd2addf012e6c1e1ddea00a5e6fb8ca609 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Mon, 14 Jul 2025 05:00:14 +0000 Subject: [PATCH 14/24] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 0af6b8c6fc794..adf3f7389c68a 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -32cd9114712a24010b0583624dc52ac302194128 +9c3064e131f4939cc95a29bb11413c49bbda1491 From a9a5b33c2d7d5d1ada0efd23f97cc1f3b774dbda Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Tue, 15 Jul 2025 04:59:27 +0000 Subject: [PATCH 15/24] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index adf3f7389c68a..67f27e7aa2c71 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -9c3064e131f4939cc95a29bb11413c49bbda1491 +7f2065a4bae1faed5bab928c670964eafbf43b55 From e1d35c551fde82668d7e8be8f759bd02e9b5345b Mon Sep 17 00:00:00 2001 From: Patrick-6 Date: Tue, 15 Jul 2025 10:32:19 +0200 Subject: [PATCH 16/24] Add std::hint::spin_loop() --- .../miri/tests/pass/0weak_memory_consistency.rs | 12 ++++++++++-- .../miri/tests/pass/0weak_memory_consistency_sc.rs | 12 ++++++++++-- src/tools/miri/tests/pass/weak_memory/weak.rs | 4 ++-- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/tools/miri/tests/pass/0weak_memory_consistency.rs b/src/tools/miri/tests/pass/0weak_memory_consistency.rs index b33aefaf1d599..5fba54934421c 100644 --- a/src/tools/miri/tests/pass/0weak_memory_consistency.rs +++ b/src/tools/miri/tests/pass/0weak_memory_consistency.rs @@ -48,6 +48,14 @@ fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { val } +/// Spins until it acquires a pre-determined boolean. +fn loads_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool { + while loc.load(ord) != val { + std::hint::spin_loop(); + } + val +} + fn test_corr() { let x = static_atomic(0); let y = static_atomic(0); @@ -216,12 +224,12 @@ fn test_sync_through_rmw_and_fences() { let go = static_atomic_bool(false); let t1 = spawn(move || { - while !go.load(Relaxed) {} + loads_bool(go, Relaxed, true); rdmw(y, x, z) }); let t2 = spawn(move || { - while !go.load(Relaxed) {} + loads_bool(go, Relaxed, true); rdmw(z, x, y) }); diff --git a/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs b/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs index 45cc5e6e72214..2c9d742a8b115 100644 --- a/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs +++ b/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs @@ -27,6 +27,14 @@ fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { val } +/// Spins until it acquires a pre-determined boolean. +fn loads_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool { + while loc.load(ord) != val { + std::hint::spin_loop(); + } + val +} + // Test case SB taken from Repairing Sequential Consistency in C/C++11 // by Lahav et al. // https://plv.mpi-sws.org/scfix/paper.pdf @@ -60,11 +68,11 @@ fn test_iriw_sc_rlx() { let a = spawn(move || x.store(true, Relaxed)); let b = spawn(move || y.store(true, Relaxed)); let c = spawn(move || { - while !x.load(SeqCst) {} + loads_bool(x, SeqCst, true); y.load(SeqCst) }); let d = spawn(move || { - while !y.load(SeqCst) {} + loads_bool(y, SeqCst, true); x.load(SeqCst) }); diff --git a/src/tools/miri/tests/pass/weak_memory/weak.rs b/src/tools/miri/tests/pass/weak_memory/weak.rs index eeab4ebf129e8..569a307ab064c 100644 --- a/src/tools/miri/tests/pass/weak_memory/weak.rs +++ b/src/tools/miri/tests/pass/weak_memory/weak.rs @@ -119,12 +119,12 @@ fn faa_replaced_by_load() -> bool { let go = static_atomic(0); let t1 = spawn(move || { - while go.load(Relaxed) == 0 {} + reads_value(go, 1); rdmw(y, x, z) }); let t2 = spawn(move || { - while go.load(Relaxed) == 0 {} + reads_value(go, 1); rdmw(z, x, y) }); From 6b51eee9df2622c790cb7ff26176c9be7672ef9c Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 7 Jul 2025 11:10:51 +0200 Subject: [PATCH 17/24] Add enter_trace_span! that checks #[cfg("tracing")] Includes a custom syntax shortand to enter_trace_span! with NAME::SUBNAME. MaybeEnteredTraceSpan is `pub use`d in lib.rs to make it available also in bin/, just in case. --- src/tools/miri/src/helpers.rs | 41 +++++++++++++++++++++++++++++++++++ src/tools/miri/src/lib.rs | 4 +++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index b259243602eeb..b76d046d1d222 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -1431,3 +1431,44 @@ impl ToU64 for usize { self.try_into().unwrap() } } + +/// This struct is needed to enforce `#[must_use]` on values produced by [enter_trace_span] even +/// when the "tracing" feature is not enabled. +#[must_use] +pub struct MaybeEnteredTraceSpan { + #[cfg(feature = "tracing")] + pub _entered_span: tracing::span::EnteredSpan, +} + +/// Enters a [tracing::info_span] only if the "tracing" feature is enabled, otherwise does nothing. +/// This is like [rustc_const_eval::enter_trace_span] except that it does not depend on the +/// [Machine] trait to check if tracing is enabled, because from the Miri codebase we can directly +/// check whether the "tracing" feature is enabled, unlike from the rustc_const_eval codebase. +/// +/// In addition to the syntax accepted by [tracing::span!], this macro optionally allows passing +/// the span name (i.e. the first macro argument) in the form `NAME::SUBNAME` (without quotes) to +/// indicate that the span has name "NAME" (usually the name of the component) and has an additional +/// more specific name "SUBNAME" (usually the function name). The latter is passed to the [tracing] +/// infrastructure as a span field with the name "NAME". This allows not being distracted by +/// subnames when looking at the trace in , but when deeper introspection +/// is needed within a component, it's still possible to view the subnames directly in the UI by +/// selecting a span, clicking on the "NAME" argument on the right, and clicking on "Visualize +/// argument values". +/// ```rust +/// // for example, the first will expand to the second +/// enter_trace_span!(borrow_tracker::on_stack_pop, /* ... */) +/// enter_trace_span!("borrow_tracker", borrow_tracker = "on_stack_pop", /* ... */) +/// ``` +#[macro_export] +macro_rules! enter_trace_span { + ($name:ident :: $subname:ident $($tt:tt)*) => {{ + enter_trace_span!(stringify!($name), $name = %stringify!(subname) $($tt)*) + }}; + + ($($tt:tt)*) => { + $crate::MaybeEnteredTraceSpan { + #[cfg(feature = "tracing")] + _entered_span: tracing::info_span!($($tt)*).entered() + } + }; +} diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index ca99d69b32d0a..f6d01831a9725 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -139,7 +139,9 @@ pub use crate::eval::{ AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, MiriEntryFnType, RejectOpWith, ValidationMode, create_ecx, eval_entry, }; -pub use crate::helpers::{AccessKind, EvalContextExt as _, ToU64 as _, ToUsize as _}; +pub use crate::helpers::{ + AccessKind, EvalContextExt as _, MaybeEnteredTraceSpan, ToU64 as _, ToUsize as _, +}; pub use crate::intrinsics::EvalContextExt as _; pub use crate::machine::{ AllocExtra, DynMachineCallback, FrameExtra, MachineCallback, MemoryKind, MiriInterpCx, From 2ee58dc31fe5f3f60b9ad56dd85900d02f70db4a Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 7 Jul 2025 11:11:15 +0200 Subject: [PATCH 18/24] Add tracing calls to borrow tracker under the same name ... but the function name is specified in the arguments, see https://github.com/rust-lang/miri/pull/4452#discussion_r2204958019 --- src/tools/miri/src/borrow_tracker/mod.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/tools/miri/src/borrow_tracker/mod.rs b/src/tools/miri/src/borrow_tracker/mod.rs index 36c61053a3205..ec6c2c60ca9c6 100644 --- a/src/tools/miri/src/borrow_tracker/mod.rs +++ b/src/tools/miri/src/borrow_tracker/mod.rs @@ -260,6 +260,7 @@ impl GlobalStateInner { kind: MemoryKind, machine: &MiriMachine<'_>, ) -> AllocState { + let _span = enter_trace_span!(borrow_tracker::new_allocation, ?id, ?alloc_size, ?kind); match self.borrow_tracker_method { BorrowTrackerMethod::StackedBorrows => AllocState::StackedBorrows(Box::new(RefCell::new(Stacks::new_allocation( @@ -280,6 +281,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { kind: RetagKind, val: &ImmTy<'tcx>, ) -> InterpResult<'tcx, ImmTy<'tcx>> { + let _span = enter_trace_span!(borrow_tracker::retag_ptr_value, ?kind, ?val.layout); let this = self.eval_context_mut(); let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; match method { @@ -293,6 +295,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { kind: RetagKind, place: &PlaceTy<'tcx>, ) -> InterpResult<'tcx> { + let _span = enter_trace_span!(borrow_tracker::retag_place_contents, ?kind, ?place); let this = self.eval_context_mut(); let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; match method { @@ -302,6 +305,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } fn protect_place(&mut self, place: &MPlaceTy<'tcx>) -> InterpResult<'tcx, MPlaceTy<'tcx>> { + let _span = enter_trace_span!(borrow_tracker::protect_place, ?place); let this = self.eval_context_mut(); let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; match method { @@ -311,6 +315,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } fn expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> { + let _span = + enter_trace_span!(borrow_tracker::expose_tag, alloc_id = alloc_id.0, tag = tag.0); let this = self.eval_context_ref(); let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; match method { @@ -354,6 +360,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { &self, frame: &Frame<'tcx, Provenance, FrameExtra<'tcx>>, ) -> InterpResult<'tcx> { + let _span = enter_trace_span!(borrow_tracker::on_stack_pop); let this = self.eval_context_ref(); let borrow_tracker = this.machine.borrow_tracker.as_ref().unwrap(); // The body of this loop needs `borrow_tracker` immutably @@ -431,6 +438,7 @@ impl AllocState { range: AllocRange, machine: &MiriMachine<'tcx>, ) -> InterpResult<'tcx> { + let _span = enter_trace_span!(borrow_tracker::before_memory_read, alloc_id = alloc_id.0); match self { AllocState::StackedBorrows(sb) => sb.borrow_mut().before_memory_read(alloc_id, prov_extra, range, machine), @@ -452,6 +460,7 @@ impl AllocState { range: AllocRange, machine: &MiriMachine<'tcx>, ) -> InterpResult<'tcx> { + let _span = enter_trace_span!(borrow_tracker::before_memory_write, alloc_id = alloc_id.0); match self { AllocState::StackedBorrows(sb) => sb.get_mut().before_memory_write(alloc_id, prov_extra, range, machine), @@ -473,6 +482,8 @@ impl AllocState { size: Size, machine: &MiriMachine<'tcx>, ) -> InterpResult<'tcx> { + let _span = + enter_trace_span!(borrow_tracker::before_memory_deallocation, alloc_id = alloc_id.0); match self { AllocState::StackedBorrows(sb) => sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine), @@ -482,6 +493,7 @@ impl AllocState { } pub fn remove_unreachable_tags(&self, tags: &FxHashSet) { + let _span = enter_trace_span!(borrow_tracker::remove_unreachable_tags); match self { AllocState::StackedBorrows(sb) => sb.borrow_mut().remove_unreachable_tags(tags), AllocState::TreeBorrows(tb) => tb.borrow_mut().remove_unreachable_tags(tags), @@ -496,6 +508,11 @@ impl AllocState { tag: BorTag, alloc_id: AllocId, // diagnostics ) -> InterpResult<'tcx> { + let _span = enter_trace_span!( + borrow_tracker::release_protector, + alloc_id = alloc_id.0, + tag = tag.0 + ); match self { AllocState::StackedBorrows(_sb) => interp_ok(()), AllocState::TreeBorrows(tb) => @@ -506,6 +523,7 @@ impl AllocState { impl VisitProvenance for AllocState { fn visit_provenance(&self, visit: &mut VisitWith<'_>) { + let _span = enter_trace_span!(borrow_tracker::visit_provenance); match self { AllocState::StackedBorrows(sb) => sb.visit_provenance(visit), AllocState::TreeBorrows(tb) => tb.visit_provenance(visit), From 9a76dde4d23371d2deef39c56ff9f22051c884f7 Mon Sep 17 00:00:00 2001 From: Patrick-6 Date: Tue, 15 Jul 2025 15:50:02 +0200 Subject: [PATCH 19/24] Make spin function naming more consistent --- .../miri/tests/pass/0weak_memory_consistency.rs | 16 ++++++++-------- .../tests/pass/0weak_memory_consistency_sc.rs | 10 +++++----- src/tools/miri/tests/pass/weak_memory/weak.rs | 8 ++++---- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/tools/miri/tests/pass/0weak_memory_consistency.rs b/src/tools/miri/tests/pass/0weak_memory_consistency.rs index 5fba54934421c..e4ed9675de822 100644 --- a/src/tools/miri/tests/pass/0weak_memory_consistency.rs +++ b/src/tools/miri/tests/pass/0weak_memory_consistency.rs @@ -41,7 +41,7 @@ fn static_atomic_bool(val: bool) -> &'static AtomicBool { } /// Spins until it acquires a pre-determined value. -fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { +fn spin_until_i32(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { while loc.load(ord) != val { std::hint::spin_loop(); } @@ -49,7 +49,7 @@ fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { } /// Spins until it acquires a pre-determined boolean. -fn loads_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool { +fn spin_until_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool { while loc.load(ord) != val { std::hint::spin_loop(); } @@ -73,7 +73,7 @@ fn test_corr() { }); // | | #[rustfmt::skip] // |synchronizes-with |happens-before let j3 = spawn(move || { // | | - loads_value(&y, Acquire, 1); // <------------+ | + spin_until_i32(&y, Acquire, 1); // <---------+ | x.load(Relaxed) // <----------------------------------------------+ // The two reads on x are ordered by hb, so they cannot observe values // differently from the modification order. If the first read observed @@ -98,12 +98,12 @@ fn test_wrc() { }); // | | #[rustfmt::skip] // |synchronizes-with | let j2 = spawn(move || { // | | - loads_value(&x, Acquire, 1); // <------------+ | + spin_until_i32(&x, Acquire, 1); // <---------+ | y.store(1, Release); // ---------------------+ |happens-before }); // | | #[rustfmt::skip] // |synchronizes-with | let j3 = spawn(move || { // | | - loads_value(&y, Acquire, 1); // <------------+ | + spin_until_i32(&y, Acquire, 1); // <---------+ | x.load(Relaxed) // <-----------------------------------------------+ }); @@ -129,7 +129,7 @@ fn test_message_passing() { #[rustfmt::skip] // |synchronizes-with | happens-before let j2 = spawn(move || { // | | let x = x; // avoid field capturing | | - loads_value(&y, Acquire, 1); // <------------+ | + spin_until_i32(&y, Acquire, 1); // <---------+ | unsafe { *x.0 } // <---------------------------------------------+ }); @@ -224,12 +224,12 @@ fn test_sync_through_rmw_and_fences() { let go = static_atomic_bool(false); let t1 = spawn(move || { - loads_bool(go, Relaxed, true); + spin_until_bool(go, Relaxed, true); rdmw(y, x, z) }); let t2 = spawn(move || { - loads_bool(go, Relaxed, true); + spin_until_bool(go, Relaxed, true); rdmw(z, x, y) }); diff --git a/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs b/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs index 2c9d742a8b115..937c2a8cf282c 100644 --- a/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs +++ b/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs @@ -20,7 +20,7 @@ fn static_atomic_bool(val: bool) -> &'static AtomicBool { } /// Spins until it acquires a pre-determined value. -fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { +fn spin_until_i32(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { while loc.load(ord) != val { std::hint::spin_loop(); } @@ -28,7 +28,7 @@ fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { } /// Spins until it acquires a pre-determined boolean. -fn loads_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool { +fn spin_until_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool { while loc.load(ord) != val { std::hint::spin_loop(); } @@ -68,11 +68,11 @@ fn test_iriw_sc_rlx() { let a = spawn(move || x.store(true, Relaxed)); let b = spawn(move || y.store(true, Relaxed)); let c = spawn(move || { - loads_bool(x, SeqCst, true); + spin_until_bool(x, SeqCst, true); y.load(SeqCst) }); let d = spawn(move || { - loads_bool(y, SeqCst, true); + spin_until_bool(y, SeqCst, true); x.load(SeqCst) }); @@ -144,7 +144,7 @@ fn test_cpp20_rwc_syncs() { }); let j2 = spawn(move || { - loads_value(&x, Relaxed, 1); + spin_until_i32(&x, Relaxed, 1); fence(SeqCst); y.load(Relaxed) }); diff --git a/src/tools/miri/tests/pass/weak_memory/weak.rs b/src/tools/miri/tests/pass/weak_memory/weak.rs index 569a307ab064c..199f83f052860 100644 --- a/src/tools/miri/tests/pass/weak_memory/weak.rs +++ b/src/tools/miri/tests/pass/weak_memory/weak.rs @@ -24,7 +24,7 @@ fn static_atomic(val: usize) -> &'static AtomicUsize { } // Spins until it reads the given value -fn reads_value(loc: &AtomicUsize, val: usize) -> usize { +fn spin_until(loc: &AtomicUsize, val: usize) -> usize { while loc.load(Relaxed) != val { std::hint::spin_loop(); } @@ -85,7 +85,7 @@ fn initialization_write(add_fence: bool) -> bool { }); let j2 = spawn(move || { - reads_value(wait, 1); + spin_until(wait, 1); if add_fence { fence(AcqRel); } @@ -119,12 +119,12 @@ fn faa_replaced_by_load() -> bool { let go = static_atomic(0); let t1 = spawn(move || { - reads_value(go, 1); + spin_until(go, 1); rdmw(y, x, z) }); let t2 = spawn(move || { - reads_value(go, 1); + spin_until(go, 1); rdmw(z, x, y) }); From 85c89494d9efdc90f2ac08446c00b0b78287b7a2 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 10 Jul 2025 15:10:33 -0400 Subject: [PATCH 20/24] add support for global constructors (i.e. life before main) --- src/tools/miri/src/eval.rs | 91 +++++++++++++++++++++---------- src/tools/miri/src/helpers.rs | 9 ++- src/tools/miri/src/shims/ctor.rs | 94 ++++++++++++++++++++++++++++++++ src/tools/miri/src/shims/mod.rs | 1 + src/tools/miri/src/shims/tls.rs | 2 +- 5 files changed, 165 insertions(+), 32 deletions(-) create mode 100644 src/tools/miri/src/shims/ctor.rs diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs index 425a136dfa54c..d986f2830aa2e 100644 --- a/src/tools/miri/src/eval.rs +++ b/src/tools/miri/src/eval.rs @@ -11,14 +11,14 @@ use rustc_abi::ExternAbi; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_hir::def::Namespace; use rustc_hir::def_id::DefId; -use rustc_middle::ty::layout::LayoutCx; +use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutCx}; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_session::config::EntryFnType; use crate::concurrency::GenmcCtx; use crate::concurrency::thread::TlsAllocAction; use crate::diagnostics::report_leaks; -use crate::shims::tls; +use crate::shims::{ctor, tls}; use crate::*; #[derive(Copy, Clone, Debug)] @@ -216,9 +216,15 @@ impl Default for MiriConfig { } /// The state of the main thread. Implementation detail of `on_main_stack_empty`. -#[derive(Default, Debug)] +#[derive(Debug)] enum MainThreadState<'tcx> { - #[default] + GlobalCtors { + ctor_state: ctor::GlobalCtorState<'tcx>, + entry_id: DefId, + entry_type: MiriEntryFnType, + argc: ImmTy<'tcx>, + argv: ImmTy<'tcx>, + }, Running, TlsDtors(tls::TlsDtorsState<'tcx>), Yield { @@ -234,6 +240,15 @@ impl<'tcx> MainThreadState<'tcx> { ) -> InterpResult<'tcx, Poll<()>> { use MainThreadState::*; match self { + GlobalCtors { ctor_state, entry_id, entry_type, argc, argv } => { + match ctor_state.on_stack_empty(this)? { + Poll::Pending => {} // just keep going + Poll::Ready(()) => { + call_main(this, *entry_id, *entry_type, argc.clone(), argv.clone())?; + *self = Running; + } + } + } Running => { *self = TlsDtors(Default::default()); } @@ -309,26 +324,6 @@ pub fn create_ecx<'tcx>( MiriMachine::new(config, layout_cx, genmc_ctx), ); - // Some parts of initialization require a full `InterpCx`. - MiriMachine::late_init(&mut ecx, config, { - let mut state = MainThreadState::default(); - // Cannot capture anything GC-relevant here. - Box::new(move |m| state.on_main_stack_empty(m)) - })?; - - // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore. - let sentinel = - helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS); - if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) { - tcx.dcx().fatal( - "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \ - Use `cargo miri setup` to prepare a sysroot that is suitable for Miri." - ); - } - - // Setup first stack frame. - let entry_instance = ty::Instance::mono(tcx, entry_id); - // First argument is constructed later, because it's skipped for `miri_start.` // Second argument (argc): length of `config.args`. @@ -395,11 +390,51 @@ pub fn create_ecx<'tcx>( ImmTy::from_immediate(imm, layout) }; + // Some parts of initialization require a full `InterpCx`. + MiriMachine::late_init(&mut ecx, config, { + let mut state = MainThreadState::GlobalCtors { + entry_id, + entry_type, + argc, + argv, + ctor_state: ctor::GlobalCtorState::default(), + }; + + // Cannot capture anything GC-relevant here. + Box::new(move |m| state.on_main_stack_empty(m)) + })?; + + // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore. + let sentinel = + helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS); + if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) { + tcx.dcx().fatal( + "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \ + Use `cargo miri setup` to prepare a sysroot that is suitable for Miri." + ); + } + + interp_ok(ecx) +} + +// Call the entry function. +fn call_main<'tcx>( + ecx: &mut MiriInterpCx<'tcx>, + entry_id: DefId, + entry_type: MiriEntryFnType, + argc: ImmTy<'tcx>, + argv: ImmTy<'tcx>, +) -> InterpResult<'tcx, ()> { + let tcx = ecx.tcx(); + + // Setup first stack frame. + let entry_instance = ty::Instance::mono(tcx, entry_id); + // Return place (in static memory so that it does not count as leak). let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?; ecx.machine.main_fn_ret_place = Some(ret_place.clone()); - // Call start function. + // Call start function. match entry_type { MiriEntryFnType::Rustc(EntryFnType::Main { .. }) => { let start_id = tcx.lang_items().start_fn().unwrap_or_else(|| { @@ -409,7 +444,7 @@ pub fn create_ecx<'tcx>( let main_ret_ty = main_ret_ty.no_bound_vars().unwrap(); let start_instance = ty::Instance::try_resolve( tcx, - typing_env, + ecx.typing_env(), start_id, tcx.mk_args(&[ty::GenericArg::from(main_ret_ty)]), ) @@ -427,7 +462,7 @@ pub fn create_ecx<'tcx>( ExternAbi::Rust, &[ ImmTy::from_scalar( - Scalar::from_pointer(main_ptr, &ecx), + Scalar::from_pointer(main_ptr, ecx), // FIXME use a proper fn ptr type ecx.machine.layouts.const_raw_ptr, ), @@ -450,7 +485,7 @@ pub fn create_ecx<'tcx>( } } - interp_ok(ecx) + interp_ok(()) } /// Evaluates the entry function specified by `entry_id`. diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index 216b7b1e4bb38..35d3e7e647ccf 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -1235,8 +1235,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(()) } - /// Lookup an array of immediates stored as a linker section of name `name`. - fn lookup_link_section(&mut self, name: &str) -> InterpResult<'tcx, Vec>> { + /// Lookup an array of immediates from any linker sections matching the provided predicate. + fn lookup_link_section( + &mut self, + include_name: impl Fn(&str) -> bool, + ) -> InterpResult<'tcx, Vec>> { let this = self.eval_context_mut(); let tcx = this.tcx.tcx; @@ -1247,7 +1250,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let Some(link_section) = attrs.link_section else { return interp_ok(()); }; - if link_section.as_str() == name { + if include_name(link_section.as_str()) { let instance = ty::Instance::mono(tcx, def_id); let const_val = this.eval_global(instance).unwrap_or_else(|err| { panic!( diff --git a/src/tools/miri/src/shims/ctor.rs b/src/tools/miri/src/shims/ctor.rs new file mode 100644 index 0000000000000..75f564989bc63 --- /dev/null +++ b/src/tools/miri/src/shims/ctor.rs @@ -0,0 +1,94 @@ +//! Implement global constructors. + +use std::task::Poll; + +use rustc_abi::ExternAbi; +use rustc_target::spec::BinaryFormat; + +use crate::*; + +#[derive(Debug, Default)] +pub struct GlobalCtorState<'tcx>(GlobalCtorStatePriv<'tcx>); + +#[derive(Debug, Default)] +enum GlobalCtorStatePriv<'tcx> { + #[default] + Init, + /// The list of constructor functions that we still have to call. + Ctors(Vec>), + Done, +} + +impl<'tcx> GlobalCtorState<'tcx> { + pub fn on_stack_empty( + &mut self, + this: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx, Poll<()>> { + use GlobalCtorStatePriv::*; + let new_state = 'new_state: { + match &mut self.0 { + Init => { + let this = this.eval_context_mut(); + + // Lookup constructors from the relevant magic link section. + let ctors = match this.tcx.sess.target.binary_format { + // Read the CRT library section on Windows. + BinaryFormat::Coff => + this.lookup_link_section(|section| section == ".CRT$XCU")?, + + // Read the `__mod_init_func` section on macOS. + BinaryFormat::MachO => + this.lookup_link_section(|section| { + let mut parts = section.splitn(3, ','); + let (segment_name, section_name, section_type) = + (parts.next(), parts.next(), parts.next()); + + segment_name == Some("__DATA") + && section_name == Some("__mod_init_func") + // The `mod_init_funcs` directive ensures that the `S_MOD_INIT_FUNC_POINTERS` flag + // is set on the section, but it is not strictly required. + && matches!(section_type, None | Some("mod_init_funcs")) + })?, + + // Read the standard `.init_array` section on platforms that use ELF, or WASM, + // which supports the same linker directive. + // FIXME: Add support for `.init_array.N`. + BinaryFormat::Elf | BinaryFormat::Wasm => + this.lookup_link_section(|section| section == ".init_array")?, + + // Other platforms have no global ctor support. + _ => break 'new_state Done, + }; + + break 'new_state Ctors(ctors); + } + Ctors(ctors) => { + if let Some(ctor) = ctors.pop() { + let this = this.eval_context_mut(); + + let ctor = ctor.to_scalar().to_pointer(this)?; + let thread_callback = this.get_ptr_fn(ctor)?.as_instance()?; + + // The signature of this function is `unsafe extern "C" fn()`. + this.call_function( + thread_callback, + ExternAbi::C { unwind: false }, + &[], + None, + ReturnContinuation::Stop { cleanup: true }, + )?; + + return interp_ok(Poll::Pending); // we stay in this state (but `ctors` got shorter) + } + + // No more constructors to run. + break 'new_state Done; + } + Done => return interp_ok(Poll::Ready(())), + } + }; + + self.0 = new_state; + interp_ok(Poll::Pending) + } +} diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs index b08ab101e9479..0faf298bfaa32 100644 --- a/src/tools/miri/src/shims/mod.rs +++ b/src/tools/miri/src/shims/mod.rs @@ -11,6 +11,7 @@ mod wasi; mod windows; mod x86; +pub mod ctor; pub mod env; pub mod extern_static; pub mod foreign_items; diff --git a/src/tools/miri/src/shims/tls.rs b/src/tools/miri/src/shims/tls.rs index 7182637437a16..1200029692dc3 100644 --- a/src/tools/miri/src/shims/tls.rs +++ b/src/tools/miri/src/shims/tls.rs @@ -302,7 +302,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Windows has a special magic linker section that is run on certain events. // We don't support most of that, but just enough to make thread-local dtors in `std` work. - interp_ok(this.lookup_link_section(".CRT$XLB")?) + interp_ok(this.lookup_link_section(|section| section == ".CRT$XLB")?) } fn schedule_windows_tls_dtor(&mut self, dtor: ImmTy<'tcx>) -> InterpResult<'tcx> { From d060859376994b32dad84e5bb7f507fbb1dcb89a Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Thu, 10 Jul 2025 16:37:54 -0400 Subject: [PATCH 21/24] add test for global constructors --- src/tools/miri/tests/pass/shims/ctor.rs | 43 +++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/tools/miri/tests/pass/shims/ctor.rs diff --git a/src/tools/miri/tests/pass/shims/ctor.rs b/src/tools/miri/tests/pass/shims/ctor.rs new file mode 100644 index 0000000000000..c832e3f82a35b --- /dev/null +++ b/src/tools/miri/tests/pass/shims/ctor.rs @@ -0,0 +1,43 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +static COUNT: AtomicUsize = AtomicUsize::new(0); + +unsafe extern "C" fn ctor() { + COUNT.fetch_add(1, Ordering::Relaxed); +} + +macro_rules! ctor { + ($ident:ident = $ctor:ident) => { + #[cfg_attr( + all(any( + target_os = "linux", + target_os = "android", + target_os = "dragonfly", + target_os = "freebsd", + target_os = "haiku", + target_os = "illumos", + target_os = "netbsd", + target_os = "openbsd", + target_os = "solaris", + target_os = "none", + target_family = "wasm", + )), + link_section = ".init_array" + )] + #[cfg_attr(windows, link_section = ".CRT$XCU")] + #[cfg_attr( + any(target_os = "macos", target_os = "ios"), + link_section = "__DATA,__mod_init_func" + )] + #[used] + static $ident: unsafe extern "C" fn() = $ctor; + }; +} + +ctor! { CTOR1 = ctor } +ctor! { CTOR2 = ctor } +ctor! { CTOR3 = ctor } + +fn main() { + assert_eq!(COUNT.load(Ordering::Relaxed), 3, "ctors did not run"); +} From 06f0dca10600390f1e4d19fdefa3453d14238048 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Mon, 7 Jul 2025 18:37:47 +0200 Subject: [PATCH 22/24] hook up native-lib bits --- src/tools/miri/src/lib.rs | 1 + src/tools/miri/src/shims/native_lib/mod.rs | 123 +++++++++++++++--- .../miri/src/shims/native_lib/trace/parent.rs | 23 +++- .../native-lib/fail/tracing/partial_init.rs | 25 ++++ .../fail/tracing/partial_init.stderr | 39 ++++++ .../fail/tracing/unexposed_reachable_alloc.rs | 29 +++++ .../tracing/unexposed_reachable_alloc.stderr | 39 ++++++ ....stderr => ptr_read_access.notrace.stderr} | 0 ....stdout => ptr_read_access.notrace.stdout} | 0 .../tests/native-lib/pass/ptr_read_access.rs | 4 + .../pass/ptr_read_access.trace.stderr | 19 +++ .../pass/ptr_read_access.trace.stdout | 1 + ...stderr => ptr_write_access.notrace.stderr} | 0 .../tests/native-lib/pass/ptr_write_access.rs | 3 + .../pass/ptr_write_access.trace.stderr | 19 +++ .../miri/tests/native-lib/ptr_read_access.c | 6 + .../miri/tests/native-lib/ptr_write_access.c | 8 ++ 17 files changed, 314 insertions(+), 25 deletions(-) create mode 100644 src/tools/miri/tests/native-lib/fail/tracing/partial_init.rs create mode 100644 src/tools/miri/tests/native-lib/fail/tracing/partial_init.stderr create mode 100644 src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs create mode 100644 src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.stderr rename src/tools/miri/tests/native-lib/pass/{ptr_read_access.stderr => ptr_read_access.notrace.stderr} (100%) rename src/tools/miri/tests/native-lib/pass/{ptr_read_access.stdout => ptr_read_access.notrace.stdout} (100%) create mode 100644 src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stderr create mode 100644 src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stdout rename src/tools/miri/tests/native-lib/pass/{ptr_write_access.stderr => ptr_write_access.notrace.stderr} (100%) create mode 100644 src/tools/miri/tests/native-lib/pass/ptr_write_access.trace.stderr diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index 374f63a16604f..2010b4dae185c 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -15,6 +15,7 @@ #![feature(unqualified_local_imports)] #![feature(derive_coerce_pointee)] #![feature(arbitrary_self_types)] +#![feature(iter_advance_by)] // Configure clippy and other lints #![allow( clippy::collapsible_else_if, diff --git a/src/tools/miri/src/shims/native_lib/mod.rs b/src/tools/miri/src/shims/native_lib/mod.rs index a2a23c4813f00..fb7b1df41a4cf 100644 --- a/src/tools/miri/src/shims/native_lib/mod.rs +++ b/src/tools/miri/src/shims/native_lib/mod.rs @@ -34,18 +34,25 @@ pub struct MemEvents { /// A single memory access. #[allow(dead_code)] #[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum AccessEvent { - /// A read may have occurred on this memory range. - /// Some instructions *may* read memory without *always* doing that, - /// so this can be an over-approximation. - /// The range info, however, is reliable if the access did happen. + /// A read occurred on this memory range. Read(AccessRange), - /// A read may have occurred on this memory range. + /// A write may have occurred on this memory range. /// Some instructions *may* write memory without *always* doing that, /// so this can be an over-approximation. /// The range info, however, is reliable if the access did happen. - Write(AccessRange), + /// If the second field is true, the access definitely happened. + Write(AccessRange, bool), +} + +impl AccessEvent { + fn get_range(&self) -> AccessRange { + match self { + AccessEvent::Read(access_range) => access_range.clone(), + AccessEvent::Write(access_range, _) => access_range.clone(), + } + } } /// The memory touched by a given access. @@ -59,6 +66,12 @@ pub struct AccessRange { pub size: usize, } +impl AccessRange { + fn end(&self) -> usize { + self.addr.strict_add(self.size) + } +} + impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {} trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Call native host function and return the output as an immediate. @@ -196,6 +209,73 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { } None } + + /// Applies the `events` to Miri's internal state. The event vector must be + /// ordered sequentially by when the accesses happened, and the sizes are + /// assumed to be exact. + fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + + for evt in events.acc_events { + let evt_rg = evt.get_range(); + // LLVM at least permits vectorising accesses to adjacent allocations, + // so we cannot assume 1 access = 1 allocation. :( + let mut rg = evt_rg.addr..evt_rg.end(); + while let Some(curr) = rg.next() { + let Some(alloc_id) = this.alloc_id_from_addr( + curr.to_u64(), + rg.len().try_into().unwrap(), + /* only_exposed_allocations */ true, + ) else { + throw_ub_format!("Foreign code did an out-of-bounds access!") + }; + let alloc = this.get_alloc_raw(alloc_id)?; + // The logical and physical address of the allocation coincide, so we can use + // this instead of `addr_from_alloc_id`. + let alloc_addr = alloc.get_bytes_unchecked_raw().addr(); + + // Determine the range inside the allocation that this access covers. This range is + // in terms of offsets from the start of `alloc`. The start of the overlap range + // will be `curr`; the end will be the minimum of the end of the allocation and the + // end of the access' range. + let overlap = curr.strict_sub(alloc_addr) + ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr)); + // Skip forward however many bytes of the access are contained in the current + // allocation, subtracting 1 since the overlap range includes the current addr + // that was already popped off of the range. + rg.advance_by(overlap.len().strict_sub(1)).unwrap(); + + match evt { + AccessEvent::Read(_) => { + // FIXME: ProvenanceMap should have something like get_range(). + let p_map = alloc.provenance(); + for idx in overlap { + // If a provenance was read by the foreign code, expose it. + if let Some(prov) = p_map.get(Size::from_bytes(idx), this) { + this.expose_provenance(prov)?; + } + } + } + AccessEvent::Write(_, certain) => { + // Sometimes we aren't certain if a write happened, in which case we + // only initialise that data if the allocation is mutable. + if certain || alloc.mutability.is_mut() { + let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?; + alloc.process_native_write( + &cx.tcx, + Some(AllocRange { + start: Size::from_bytes(overlap.start), + size: Size::from_bytes(overlap.len()), + }), + ) + } + } + } + } + } + + interp_ok(()) + } } impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} @@ -221,6 +301,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } }; + // Do we have ptrace? + let tracing = trace::Supervisor::is_enabled(); + // Get the function arguments, and convert them to `libffi`-compatible form. let mut libffi_args = Vec::::with_capacity(args.len()); for arg in args.iter() { @@ -240,9 +323,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // The first time this happens, print a warning. if !this.machine.native_call_mem_warned.replace(true) { // Newly set, so first time we get here. - this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { - tracing: self::trace::Supervisor::is_enabled(), - }); + this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing }); } this.expose_provenance(prov)?; @@ -269,15 +350,23 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // be read by FFI. The `black_box` is defensive programming as LLVM likes // to (incorrectly) optimize away ptr2int casts whose result is unused. std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance()); - // Expose all provenances in this allocation, since the native code can do $whatever. - for prov in alloc.provenance().provenances() { - this.expose_provenance(prov)?; + + if !tracing { + // Expose all provenances in this allocation, since the native code can do $whatever. + // Can be skipped when tracing; in that case we'll expose just the actually-read parts later. + for prov in alloc.provenance().provenances() { + this.expose_provenance(prov)?; + } } // Prepare for possible write from native code if mutable. if info.mutbl.is_mut() { let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?; - alloc.process_native_write(&cx.tcx, None); + // These writes could initialize everything and wreck havoc with the pointers. + // We can skip that when tracing; in that case we'll later do that only for the memory that got actually written. + if !tracing { + alloc.process_native_write(&cx.tcx, None); + } // Also expose *mutable* provenance for the interpreter-level allocation. std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance()); } @@ -289,10 +378,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let (ret, maybe_memevents) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; - if cfg!(target_os = "linux") - && let Some(events) = maybe_memevents - { - trace!("Registered FFI events:\n{events:#0x?}"); + if tracing { + this.tracing_apply_accesses(maybe_memevents.unwrap())?; } this.write_immediate(*ret, dest)?; diff --git a/src/tools/miri/src/shims/native_lib/trace/parent.rs b/src/tools/miri/src/shims/native_lib/trace/parent.rs index fc4047eec51d3..83f6c7a13fcfe 100644 --- a/src/tools/miri/src/shims/native_lib/trace/parent.rs +++ b/src/tools/miri/src/shims/native_lib/trace/parent.rs @@ -58,11 +58,11 @@ impl ArchIndependentRegs for libc::user_regs_struct { #[rustfmt::skip] impl ArchIndependentRegs for libc::user_regs_struct { #[inline] - fn ip(&self) -> usize { self.eip.try_into().unwrap() } + fn ip(&self) -> usize { self.eip.cast_unsigned().try_into().unwrap() } #[inline] - fn set_ip(&mut self, ip: usize) { self.eip = ip.try_into().unwrap() } + fn set_ip(&mut self, ip: usize) { self.eip = ip.cast_signed().try_into().unwrap() } #[inline] - fn set_sp(&mut self, sp: usize) { self.esp = sp.try_into().unwrap() } + fn set_sp(&mut self, sp: usize) { self.esp = sp.cast_signed().try_into().unwrap() } } /// A unified event representing something happening on the child process. Wraps @@ -386,7 +386,17 @@ fn capstone_find_events( acc_events.push(AccessEvent::Read(push.clone())); } if acc_ty.is_writable() { - acc_events.push(AccessEvent::Write(push)); + // FIXME: This could be made certain; either determine all cases where + // only reads happen, or have an intermediate mempr_* function to first + // map the page(s) as readonly and check if a segfault occurred. + + // Per https://docs.rs/iced-x86/latest/iced_x86/enum.OpAccess.html, + // we know that the possible access types are Read, CondRead, Write, + // CondWrite, ReadWrite, and ReadCondWrite. Since we got a segfault + // we know some kind of access happened so Cond{Read, Write}s are + // certain reads and writes; the only uncertainty is with an RW op + // as it might be a ReadCondWrite with the write condition unmet. + acc_events.push(AccessEvent::Write(push, !acc_ty.is_readable())); } return true; @@ -442,8 +452,7 @@ fn handle_segfault( // Get information on what caused the segfault. This contains the address // that triggered it. let siginfo = ptrace::getsiginfo(pid).unwrap(); - // All x86, ARM, etc. instructions only have at most one memory operand - // (thankfully!) + // All x86 instructions only have at most one memory operand (thankfully!) // SAFETY: si_addr is safe to call. let addr = unsafe { siginfo.si_addr().addr() }; let page_addr = addr.strict_sub(addr.strict_rem(page_size)); @@ -490,7 +499,7 @@ fn handle_segfault( ptrace::write( pid, (&raw const PAGE_ADDR).cast_mut().cast(), - libc::c_long::try_from(page_addr).unwrap(), + libc::c_long::try_from(page_addr.cast_signed()).unwrap(), ) .unwrap(); diff --git a/src/tools/miri/tests/native-lib/fail/tracing/partial_init.rs b/src/tools/miri/tests/native-lib/fail/tracing/partial_init.rs new file mode 100644 index 0000000000000..e267f82e215b5 --- /dev/null +++ b/src/tools/miri/tests/native-lib/fail/tracing/partial_init.rs @@ -0,0 +1,25 @@ +//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu +//@compile-flags: -Zmiri-native-lib-enable-tracing + +extern "C" { + fn init_n(n: i32, ptr: *mut u8); +} + +fn main() { + partial_init(); +} + +// Initialise the first 2 elements of the slice from native code, and check +// that the 3rd is correctly deemed uninit. +fn partial_init() { + let mut slice = std::mem::MaybeUninit::<[u8; 3]>::uninit(); + let slice_ptr = slice.as_mut_ptr().cast::(); + unsafe { + // Initialize the first two elements. + init_n(2, slice_ptr); + assert!(*slice_ptr == 0); + assert!(*slice_ptr.offset(1) == 0); + // Reading the third is UB! + let _val = *slice_ptr.offset(2); //~ ERROR: Undefined Behavior: using uninitialized data + } +} diff --git a/src/tools/miri/tests/native-lib/fail/tracing/partial_init.stderr b/src/tools/miri/tests/native-lib/fail/tracing/partial_init.stderr new file mode 100644 index 0000000000000..84fd913b5e5b3 --- /dev/null +++ b/src/tools/miri/tests/native-lib/fail/tracing/partial_init.stderr @@ -0,0 +1,39 @@ +warning: sharing memory with a native function called via FFI + --> tests/native-lib/fail/tracing/partial_init.rs:LL:CC + | +LL | init_n(2, slice_ptr); + | ^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function + | + = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis + = help: in particular, Miri assumes that the native call initializes all memory it has written to + = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory + = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free + = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here + = note: BACKTRACE: + = note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC +note: inside `main` + --> tests/native-lib/fail/tracing/partial_init.rs:LL:CC + | +LL | partial_init(); + | ^^^^^^^^^^^^^^ + +error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory + --> tests/native-lib/fail/tracing/partial_init.rs:LL:CC + | +LL | let _val = *slice_ptr.offset(2); + | ^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC +note: inside `main` + --> tests/native-lib/fail/tracing/partial_init.rs:LL:CC + | +LL | partial_init(); + | ^^^^^^^^^^^^^^ + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error; 1 warning emitted + diff --git a/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs b/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs new file mode 100644 index 0000000000000..b78c29d98dbb6 --- /dev/null +++ b/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs @@ -0,0 +1,29 @@ +//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu +//@compile-flags: -Zmiri-permissive-provenance -Zmiri-native-lib-enable-tracing + +extern "C" { + fn do_one_deref(ptr: *const *const *const i32) -> usize; +} + +fn main() { + unexposed_reachable_alloc(); +} + +// Expose 2 pointers by virtue of doing a native read and assert that the 3rd in +// the chain remains properly unexposed. +fn unexposed_reachable_alloc() { + let inner = 42; + let intermediate_a = &raw const inner; + let intermediate_b = &raw const intermediate_a; + let exposed = &raw const intermediate_b; + // Discard the return value; it's just there so the access in C doesn't get optimised away. + unsafe { do_one_deref(exposed) }; + // Native read should have exposed the address of intermediate_b... + let valid: *const i32 = std::ptr::with_exposed_provenance(intermediate_b.addr()); + // but not of intermediate_a. + let invalid: *const i32 = std::ptr::with_exposed_provenance(intermediate_a.addr()); + unsafe { + let _ok = *valid; + let _not_ok = *invalid; //~ ERROR: Undefined Behavior: memory access failed: attempting to access + } +} diff --git a/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.stderr b/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.stderr new file mode 100644 index 0000000000000..2d34dac1b3ff3 --- /dev/null +++ b/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.stderr @@ -0,0 +1,39 @@ +warning: sharing memory with a native function called via FFI + --> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC + | +LL | unsafe { do_one_deref(exposed) }; + | ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function + | + = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis + = help: in particular, Miri assumes that the native call initializes all memory it has written to + = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory + = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free + = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here + = note: BACKTRACE: + = note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC +note: inside `main` + --> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC + | +LL | unexposed_reachable_alloc(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: Undefined Behavior: memory access failed: attempting to access 4 bytes, but got $HEX[noalloc] which is a dangling pointer (it has no provenance) + --> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC + | +LL | let _not_ok = *invalid; + | ^^^^^^^^ Undefined Behavior occurred here + | + = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior + = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information + = note: BACKTRACE: + = note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC +note: inside `main` + --> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC + | +LL | unexposed_reachable_alloc(); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace + +error: aborting due to 1 previous error; 1 warning emitted + diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr b/src/tools/miri/tests/native-lib/pass/ptr_read_access.notrace.stderr similarity index 100% rename from src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr rename to src/tools/miri/tests/native-lib/pass/ptr_read_access.notrace.stderr diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.stdout b/src/tools/miri/tests/native-lib/pass/ptr_read_access.notrace.stdout similarity index 100% rename from src/tools/miri/tests/native-lib/pass/ptr_read_access.stdout rename to src/tools/miri/tests/native-lib/pass/ptr_read_access.notrace.stdout diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs b/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs index 4c85213536785..4f3c37f00c1d7 100644 --- a/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs +++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs @@ -1,3 +1,7 @@ +//@revisions: trace notrace +//@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu +//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing + fn main() { test_access_pointer(); test_access_simple(); diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stderr b/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stderr new file mode 100644 index 0000000000000..c2a4508b7fcc7 --- /dev/null +++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stderr @@ -0,0 +1,19 @@ +warning: sharing memory with a native function called via FFI + --> tests/native-lib/pass/ptr_read_access.rs:LL:CC + | +LL | unsafe { print_pointer(&x) }; + | ^^^^^^^^^^^^^^^^^ sharing memory with a native function + | + = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis + = help: in particular, Miri assumes that the native call initializes all memory it has written to + = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory + = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free + = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here + = note: BACKTRACE: + = note: inside `test_access_pointer` at tests/native-lib/pass/ptr_read_access.rs:LL:CC +note: inside `main` + --> tests/native-lib/pass/ptr_read_access.rs:LL:CC + | +LL | test_access_pointer(); + | ^^^^^^^^^^^^^^^^^^^^^ + diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stdout b/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stdout new file mode 100644 index 0000000000000..1a8799abfc93e --- /dev/null +++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stdout @@ -0,0 +1 @@ +printing pointer dereference from C: 42 diff --git a/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr b/src/tools/miri/tests/native-lib/pass/ptr_write_access.notrace.stderr similarity index 100% rename from src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr rename to src/tools/miri/tests/native-lib/pass/ptr_write_access.notrace.stderr diff --git a/src/tools/miri/tests/native-lib/pass/ptr_write_access.rs b/src/tools/miri/tests/native-lib/pass/ptr_write_access.rs index 86a9c97f4cecc..57def78b0ab17 100644 --- a/src/tools/miri/tests/native-lib/pass/ptr_write_access.rs +++ b/src/tools/miri/tests/native-lib/pass/ptr_write_access.rs @@ -1,3 +1,6 @@ +//@revisions: trace notrace +//@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu +//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing //@compile-flags: -Zmiri-permissive-provenance #![feature(box_as_ptr)] diff --git a/src/tools/miri/tests/native-lib/pass/ptr_write_access.trace.stderr b/src/tools/miri/tests/native-lib/pass/ptr_write_access.trace.stderr new file mode 100644 index 0000000000000..dbf021b15bec4 --- /dev/null +++ b/src/tools/miri/tests/native-lib/pass/ptr_write_access.trace.stderr @@ -0,0 +1,19 @@ +warning: sharing memory with a native function called via FFI + --> tests/native-lib/pass/ptr_write_access.rs:LL:CC + | +LL | unsafe { increment_int(&mut x) }; + | ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function + | + = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis + = help: in particular, Miri assumes that the native call initializes all memory it has written to + = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory + = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free + = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here + = note: BACKTRACE: + = note: inside `test_increment_int` at tests/native-lib/pass/ptr_write_access.rs:LL:CC +note: inside `main` + --> tests/native-lib/pass/ptr_write_access.rs:LL:CC + | +LL | test_increment_int(); + | ^^^^^^^^^^^^^^^^^^^^ + diff --git a/src/tools/miri/tests/native-lib/ptr_read_access.c b/src/tools/miri/tests/native-lib/ptr_read_access.c index b89126d3d7c88..021eb6adca4f6 100644 --- a/src/tools/miri/tests/native-lib/ptr_read_access.c +++ b/src/tools/miri/tests/native-lib/ptr_read_access.c @@ -49,3 +49,9 @@ typedef struct Static { EXPORT int32_t access_static(const Static *s_ptr) { return s_ptr->recurse->recurse->value; } + +/* Test: unexposed_reachable_alloc */ + +EXPORT uintptr_t do_one_deref(const int32_t ***ptr) { + return (uintptr_t)*ptr; +} diff --git a/src/tools/miri/tests/native-lib/ptr_write_access.c b/src/tools/miri/tests/native-lib/ptr_write_access.c index fd8b005499c76..5260d0b365175 100644 --- a/src/tools/miri/tests/native-lib/ptr_write_access.c +++ b/src/tools/miri/tests/native-lib/ptr_write_access.c @@ -107,3 +107,11 @@ EXPORT void set_shared_mem(int32_t** ptr) { EXPORT void init_ptr_stored_in_shared_mem(int32_t val) { **shared_place = val; } + +/* Test: partial_init */ + +EXPORT void init_n(int32_t n, char* ptr) { + for (int i=0; i Date: Wed, 16 Jul 2025 03:53:20 -0400 Subject: [PATCH 23/24] update comment to reference legacy `.ctors` section Co-authored-by: Ralf Jung --- src/tools/miri/src/shims/ctor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/src/shims/ctor.rs b/src/tools/miri/src/shims/ctor.rs index 75f564989bc63..367c2b1608b49 100644 --- a/src/tools/miri/src/shims/ctor.rs +++ b/src/tools/miri/src/shims/ctor.rs @@ -52,7 +52,7 @@ impl<'tcx> GlobalCtorState<'tcx> { // Read the standard `.init_array` section on platforms that use ELF, or WASM, // which supports the same linker directive. - // FIXME: Add support for `.init_array.N`. + // FIXME: Add support for `.init_array.N` and `.ctors`? BinaryFormat::Elf | BinaryFormat::Wasm => this.lookup_link_section(|section| section == ".init_array")?, From a7818abc141a072ff069ed6c43ebbe56eade9c23 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 16 Jul 2025 11:01:44 +0200 Subject: [PATCH 24/24] minor tweaks and comments --- src/tools/miri/src/concurrency/thread.rs | 2 + src/tools/miri/src/eval.rs | 41 ++++++++++--------- .../src/shims/{ctor.rs => global_ctor.rs} | 8 +++- src/tools/miri/src/shims/mod.rs | 2 +- .../miri/tests/pass/alloc-access-tracking.rs | 4 +- src/tools/miri/tests/pass/shims/ctor.rs | 3 ++ 6 files changed, 35 insertions(+), 25 deletions(-) rename src/tools/miri/src/shims/{ctor.rs => global_ctor.rs} (87%) diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs index abfee0ee8746e..878afdf251731 100644 --- a/src/tools/miri/src/concurrency/thread.rs +++ b/src/tools/miri/src/concurrency/thread.rs @@ -677,6 +677,8 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> { fn run_on_stack_empty(&mut self) -> InterpResult<'tcx, Poll<()>> { let this = self.eval_context_mut(); // Inform GenMC that a thread has finished all user code. GenMC needs to know this for scheduling. + // FIXME(GenMC): Thread-local destructors *are* user code, so this is odd. Also now that we + // support pre-main constructors, it can get called there as well. if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() { let thread_id = this.active_thread(); genmc_ctx.handle_thread_stack_empty(thread_id); diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs index d986f2830aa2e..be6404f64e8f8 100644 --- a/src/tools/miri/src/eval.rs +++ b/src/tools/miri/src/eval.rs @@ -18,7 +18,7 @@ use rustc_session::config::EntryFnType; use crate::concurrency::GenmcCtx; use crate::concurrency::thread::TlsAllocAction; use crate::diagnostics::report_leaks; -use crate::shims::{ctor, tls}; +use crate::shims::{global_ctor, tls}; use crate::*; #[derive(Copy, Clone, Debug)] @@ -219,9 +219,11 @@ impl Default for MiriConfig { #[derive(Debug)] enum MainThreadState<'tcx> { GlobalCtors { - ctor_state: ctor::GlobalCtorState<'tcx>, + ctor_state: global_ctor::GlobalCtorState<'tcx>, + /// The main function to call. entry_id: DefId, entry_type: MiriEntryFnType, + /// Arguments passed to `main`. argc: ImmTy<'tcx>, argv: ImmTy<'tcx>, }, @@ -324,12 +326,19 @@ pub fn create_ecx<'tcx>( MiriMachine::new(config, layout_cx, genmc_ctx), ); - // First argument is constructed later, because it's skipped for `miri_start.` + // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore. + let sentinel = + helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS); + if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) { + tcx.dcx().fatal( + "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \ + Use `cargo miri setup` to prepare a sysroot that is suitable for Miri." + ); + } - // Second argument (argc): length of `config.args`. + // Compute argc and argv from `config.args`. let argc = ImmTy::from_int(i64::try_from(config.args.len()).unwrap(), ecx.machine.layouts.isize); - // Third argument (`argv`): created from `config.args`. let argv = { // Put each argument in memory, collect pointers. let mut argvs = Vec::>::with_capacity(config.args.len()); @@ -354,7 +363,7 @@ pub fn create_ecx<'tcx>( ecx.write_immediate(arg, &place)?; } ecx.mark_immutable(&argvs_place); - // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`. + // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`, and for the GC to see them. { let argc_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?; @@ -369,7 +378,7 @@ pub fn create_ecx<'tcx>( ecx.machine.argv = Some(argv_place.ptr()); } // Store command line as UTF-16 for Windows `GetCommandLineW`. - { + if tcx.sess.target.os == "windows" { // Construct a command string with all the arguments. let cmd_utf16: Vec = args_to_utf16_command_string(config.args.iter()); @@ -392,28 +401,20 @@ pub fn create_ecx<'tcx>( // Some parts of initialization require a full `InterpCx`. MiriMachine::late_init(&mut ecx, config, { - let mut state = MainThreadState::GlobalCtors { + let mut main_thread_state = MainThreadState::GlobalCtors { entry_id, entry_type, argc, argv, - ctor_state: ctor::GlobalCtorState::default(), + ctor_state: global_ctor::GlobalCtorState::default(), }; // Cannot capture anything GC-relevant here. - Box::new(move |m| state.on_main_stack_empty(m)) + // `argc` and `argv` *are* GC_relevant, but they also get stored in `machine.argc` and + // `machine.argv` so we are good. + Box::new(move |m| main_thread_state.on_main_stack_empty(m)) })?; - // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore. - let sentinel = - helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS); - if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) { - tcx.dcx().fatal( - "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \ - Use `cargo miri setup` to prepare a sysroot that is suitable for Miri." - ); - } - interp_ok(ecx) } diff --git a/src/tools/miri/src/shims/ctor.rs b/src/tools/miri/src/shims/global_ctor.rs similarity index 87% rename from src/tools/miri/src/shims/ctor.rs rename to src/tools/miri/src/shims/global_ctor.rs index 367c2b1608b49..c56251bbe63ae 100644 --- a/src/tools/miri/src/shims/ctor.rs +++ b/src/tools/miri/src/shims/global_ctor.rs @@ -45,8 +45,12 @@ impl<'tcx> GlobalCtorState<'tcx> { segment_name == Some("__DATA") && section_name == Some("__mod_init_func") - // The `mod_init_funcs` directive ensures that the `S_MOD_INIT_FUNC_POINTERS` flag - // is set on the section, but it is not strictly required. + // The `mod_init_funcs` directive ensures that the + // `S_MOD_INIT_FUNC_POINTERS` flag is set on the section. LLVM + // adds this automatically so we currently do not require it. + // FIXME: is this guaranteed LLVM behavior? If not, we shouldn't + // implicitly add it here. Also see + // . && matches!(section_type, None | Some("mod_init_funcs")) })?, diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs index 0faf298bfaa32..75540f6f15000 100644 --- a/src/tools/miri/src/shims/mod.rs +++ b/src/tools/miri/src/shims/mod.rs @@ -11,10 +11,10 @@ mod wasi; mod windows; mod x86; -pub mod ctor; pub mod env; pub mod extern_static; pub mod foreign_items; +pub mod global_ctor; pub mod io_error; pub mod os_str; pub mod panic; diff --git a/src/tools/miri/tests/pass/alloc-access-tracking.rs b/src/tools/miri/tests/pass/alloc-access-tracking.rs index 0e88951dc43f9..9eba0ca171bc6 100644 --- a/src/tools/miri/tests/pass/alloc-access-tracking.rs +++ b/src/tools/miri/tests/pass/alloc-access-tracking.rs @@ -1,7 +1,7 @@ #![no_std] #![no_main] -//@compile-flags: -Zmiri-track-alloc-id=20 -Zmiri-track-alloc-accesses -Cpanic=abort -//@normalize-stderr-test: "id 20" -> "id $$ALLOC" +//@compile-flags: -Zmiri-track-alloc-id=19 -Zmiri-track-alloc-accesses -Cpanic=abort +//@normalize-stderr-test: "id 19" -> "id $$ALLOC" //@only-target: linux # alloc IDs differ between OSes (due to extern static allocations) extern "Rust" { diff --git a/src/tools/miri/tests/pass/shims/ctor.rs b/src/tools/miri/tests/pass/shims/ctor.rs index c832e3f82a35b..b997d2386b82c 100644 --- a/src/tools/miri/tests/pass/shims/ctor.rs +++ b/src/tools/miri/tests/pass/shims/ctor.rs @@ -6,6 +6,7 @@ unsafe extern "C" fn ctor() { COUNT.fetch_add(1, Ordering::Relaxed); } +#[rustfmt::skip] macro_rules! ctor { ($ident:ident = $ctor:ident) => { #[cfg_attr( @@ -27,6 +28,8 @@ macro_rules! ctor { #[cfg_attr(windows, link_section = ".CRT$XCU")] #[cfg_attr( any(target_os = "macos", target_os = "ios"), + // We do not set the `mod_init_funcs` flag here since ctor/inventory also do not do + // that. See . link_section = "__DATA,__mod_init_func" )] #[used]