diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md
index b05acff72b50a..7816ce1ac5619 100644
--- a/src/tools/miri/README.md
+++ b/src/tools/miri/README.md
@@ -342,9 +342,9 @@ environment variable. We first document the most relevant and most commonly used
   is enabled (the default), this is also used to emulate system entropy. The default seed is 0. You
   can increase test coverage by running Miri multiple times with different seeds.
 * `-Zmiri-strict-provenance` enables [strict
-  provenance](https://github.com/rust-lang/rust/issues/95228) checking in Miri. This means that
-  casting an integer to a pointer will stop execution because the provenance of the pointer
-  cannot be determined.
+  provenance](https://doc.rust-lang.org/nightly/std/ptr/index.html#strict-provenance) checking in
+  Miri. This means that casting an integer to a pointer will stop execution because the provenance
+  of the pointer cannot be determined.
 * `-Zmiri-symbolic-alignment-check` makes the alignment check more strict.  By default, alignment is
   checked by casting the pointer to an integer, and making sure that is a multiple of the alignment.
   This can lead to cases where a program passes the alignment check by pure chance, because things
diff --git a/src/tools/miri/miri-script/Cargo.toml b/src/tools/miri/miri-script/Cargo.toml
index 462a9cc62bff0..9240788d6bce1 100644
--- a/src/tools/miri/miri-script/Cargo.toml
+++ b/src/tools/miri/miri-script/Cargo.toml
@@ -7,6 +7,7 @@ repository = "https://github.com/rust-lang/miri"
 version = "0.1.0"
 default-run = "miri-script"
 edition = "2024"
+rust-version = "1.85"
 
 [workspace]
 # We make this a workspace root so that cargo does not go looking in ../Cargo.toml for the workspace root.
diff --git a/src/tools/miri/miri-script/src/commands.rs b/src/tools/miri/miri-script/src/commands.rs
index e948beef00447..e6ebdf54e3851 100644
--- a/src/tools/miri/miri-script/src/commands.rs
+++ b/src/tools/miri/miri-script/src/commands.rs
@@ -702,7 +702,6 @@ impl Command {
         let mut early_flags = Vec::<OsString>::new();
 
         // In `dep` mode, the target is already passed via `MIRI_TEST_TARGET`
-        #[expect(clippy::collapsible_if)] // we need to wait until this is stable
         if !dep {
             if let Some(target) = &target {
                 early_flags.push("--target".into());
@@ -735,7 +734,6 @@ impl Command {
         // Add Miri flags
         let mut cmd = cmd.args(&miri_flags).args(&early_flags).args(&flags);
         // For `--dep` we also need to set the target in the env var.
-        #[expect(clippy::collapsible_if)] // we need to wait until this is stable
         if dep {
             if let Some(target) = &target {
                 cmd = cmd.env("MIRI_TEST_TARGET", target);
diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 0130cf13a45f7..67f27e7aa2c71 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-733b47ea4b1b86216f14ef56e49440c33933f230
+7f2065a4bae1faed5bab928c670964eafbf43b55
diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs
index a7bb9b4da7580..7b2f1a3eebf3d 100644
--- a/src/tools/miri/src/alloc/isolated_alloc.rs
+++ b/src/tools/miri/src/alloc/isolated_alloc.rs
@@ -302,23 +302,20 @@ impl IsolatedAlloc {
         }
     }
 
-    /// Returns a vector of page addresses managed by the allocator.
-    pub fn pages(&self) -> Vec<usize> {
-        let mut pages: Vec<usize> =
-            self.page_ptrs.iter().map(|p| p.expose_provenance().get()).collect();
-        for (ptr, size) in self.huge_ptrs.iter() {
-            for i in 0..size / self.page_size {
-                pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size));
-            }
-        }
-        pages
+    /// Returns a list of page addresses managed by the allocator.
+    pub fn pages(&self) -> impl Iterator<Item = usize> {
+        let pages = self.page_ptrs.iter().map(|p| p.expose_provenance().get());
+        pages.chain(self.huge_ptrs.iter().flat_map(|(ptr, size)| {
+            (0..size / self.page_size)
+                .map(|i| ptr.expose_provenance().get().strict_add(i * self.page_size))
+        }))
     }
 
     /// Protects all owned memory as `PROT_NONE`, preventing accesses.
     ///
     /// SAFETY: Accessing memory after this point will result in a segfault
     /// unless it is first unprotected.
-    pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> {
+    pub unsafe fn start_ffi(&mut self) -> Result<(), nix::errno::Errno> {
         let prot = mman::ProtFlags::PROT_NONE;
         unsafe { self.mprotect(prot) }
     }
@@ -326,7 +323,7 @@ impl IsolatedAlloc {
     /// Deprotects all owned memory by setting it to RW. Erroring here is very
     /// likely unrecoverable, so it may panic if applying those permissions
     /// fails.
-    pub fn unprep_ffi(&mut self) {
+    pub fn end_ffi(&mut self) {
         let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE;
         unsafe {
             self.mprotect(prot).unwrap();
diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs
index e3b579a4ac61d..61cebedf08104 100644
--- a/src/tools/miri/src/bin/miri.rs
+++ b/src/tools/miri/src/bin/miri.rs
@@ -233,8 +233,6 @@ impl rustc_driver::Callbacks for MiriCompilerCalls {
         } else {
             let return_code = miri::eval_entry(tcx, entry_def_id, entry_type, &config, None)
                 .unwrap_or_else(|| {
-                    //#[cfg(target_os = "linux")]
-                    //miri::native_lib::register_retcode_sv(rustc_driver::EXIT_FAILURE);
                     tcx.dcx().abort_if_errors();
                     rustc_driver::EXIT_FAILURE
                 });
@@ -337,6 +335,9 @@ impl rustc_driver::Callbacks for MiriBeRustCompilerCalls {
 fn exit(exit_code: i32) -> ! {
     // Drop the tracing guard before exiting, so tracing calls are flushed correctly.
     deinit_loggers();
+    // Make sure the supervisor knows about the code code.
+    #[cfg(target_os = "linux")]
+    miri::native_lib::register_retcode_sv(exit_code);
     std::process::exit(exit_code);
 }
 
@@ -355,6 +356,11 @@ fn run_compiler_and_exit(
     args: &[String],
     callbacks: &mut (dyn rustc_driver::Callbacks + Send),
 ) -> ! {
+    // Install the ctrlc handler that sets `rustc_const_eval::CTRL_C_RECEIVED`, even if
+    // MIRI_BE_RUSTC is set. We do this late so that when `native_lib::init_sv` is called,
+    // there are no other threads.
+    rustc_driver::install_ctrlc_handler();
+
     // Invoke compiler, catch any unwinding panics and handle return code.
     let exit_code =
         rustc_driver::catch_with_exit_code(move || rustc_driver::run_compiler(args, callbacks));
@@ -439,10 +445,6 @@ fn main() {
     let args = rustc_driver::catch_fatal_errors(|| rustc_driver::args::raw_args(&early_dcx))
         .unwrap_or_else(|_| std::process::exit(rustc_driver::EXIT_FAILURE));
 
-    // Install the ctrlc handler that sets `rustc_const_eval::CTRL_C_RECEIVED`, even if
-    // MIRI_BE_RUSTC is set.
-    rustc_driver::install_ctrlc_handler();
-
     // If the environment asks us to actually be rustc, then do that.
     if let Some(crate_kind) = env::var_os("MIRI_BE_RUSTC") {
         // Earliest rustc setup.
@@ -750,15 +752,15 @@ fn main() {
 
     debug!("rustc arguments: {:?}", rustc_args);
     debug!("crate arguments: {:?}", miri_config.args);
-    #[cfg(target_os = "linux")]
     if !miri_config.native_lib.is_empty() && miri_config.native_lib_enable_tracing {
-        // FIXME: This should display a diagnostic / warning on error
-        // SAFETY: If any other threads exist at this point (namely for the ctrlc
-        // handler), they will not interact with anything on the main rustc/Miri
-        // thread in an async-signal-unsafe way such as by accessing shared
-        // semaphores, etc.; the handler only calls `sleep()` and `exit()`, which
-        // are async-signal-safe, as is accessing atomics
-        //let _ = unsafe { miri::native_lib::init_sv() };
+        // SAFETY: No other threads are running
+        #[cfg(target_os = "linux")]
+        if unsafe { miri::native_lib::init_sv() }.is_err() {
+            eprintln!(
+                "warning: The native-lib tracer could not be started. Is this an x86 Linux system, and does Miri have permissions to ptrace?\n\
+                Falling back to non-tracing native-lib mode."
+            );
+        }
     }
     run_compiler_and_exit(
         &rustc_args,
diff --git a/src/tools/miri/src/borrow_tracker/mod.rs b/src/tools/miri/src/borrow_tracker/mod.rs
index 36c61053a3205..ec6c2c60ca9c6 100644
--- a/src/tools/miri/src/borrow_tracker/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/mod.rs
@@ -260,6 +260,7 @@ impl GlobalStateInner {
         kind: MemoryKind,
         machine: &MiriMachine<'_>,
     ) -> AllocState {
+        let _span = enter_trace_span!(borrow_tracker::new_allocation, ?id, ?alloc_size, ?kind);
         match self.borrow_tracker_method {
             BorrowTrackerMethod::StackedBorrows =>
                 AllocState::StackedBorrows(Box::new(RefCell::new(Stacks::new_allocation(
@@ -280,6 +281,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         kind: RetagKind,
         val: &ImmTy<'tcx>,
     ) -> InterpResult<'tcx, ImmTy<'tcx>> {
+        let _span = enter_trace_span!(borrow_tracker::retag_ptr_value, ?kind, ?val.layout);
         let this = self.eval_context_mut();
         let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
         match method {
@@ -293,6 +295,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         kind: RetagKind,
         place: &PlaceTy<'tcx>,
     ) -> InterpResult<'tcx> {
+        let _span = enter_trace_span!(borrow_tracker::retag_place_contents, ?kind, ?place);
         let this = self.eval_context_mut();
         let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
         match method {
@@ -302,6 +305,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     }
 
     fn protect_place(&mut self, place: &MPlaceTy<'tcx>) -> InterpResult<'tcx, MPlaceTy<'tcx>> {
+        let _span = enter_trace_span!(borrow_tracker::protect_place, ?place);
         let this = self.eval_context_mut();
         let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
         match method {
@@ -311,6 +315,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     }
 
     fn expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
+        let _span =
+            enter_trace_span!(borrow_tracker::expose_tag, alloc_id = alloc_id.0, tag = tag.0);
         let this = self.eval_context_ref();
         let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
         match method {
@@ -354,6 +360,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         &self,
         frame: &Frame<'tcx, Provenance, FrameExtra<'tcx>>,
     ) -> InterpResult<'tcx> {
+        let _span = enter_trace_span!(borrow_tracker::on_stack_pop);
         let this = self.eval_context_ref();
         let borrow_tracker = this.machine.borrow_tracker.as_ref().unwrap();
         // The body of this loop needs `borrow_tracker` immutably
@@ -431,6 +438,7 @@ impl AllocState {
         range: AllocRange,
         machine: &MiriMachine<'tcx>,
     ) -> InterpResult<'tcx> {
+        let _span = enter_trace_span!(borrow_tracker::before_memory_read, alloc_id = alloc_id.0);
         match self {
             AllocState::StackedBorrows(sb) =>
                 sb.borrow_mut().before_memory_read(alloc_id, prov_extra, range, machine),
@@ -452,6 +460,7 @@ impl AllocState {
         range: AllocRange,
         machine: &MiriMachine<'tcx>,
     ) -> InterpResult<'tcx> {
+        let _span = enter_trace_span!(borrow_tracker::before_memory_write, alloc_id = alloc_id.0);
         match self {
             AllocState::StackedBorrows(sb) =>
                 sb.get_mut().before_memory_write(alloc_id, prov_extra, range, machine),
@@ -473,6 +482,8 @@ impl AllocState {
         size: Size,
         machine: &MiriMachine<'tcx>,
     ) -> InterpResult<'tcx> {
+        let _span =
+            enter_trace_span!(borrow_tracker::before_memory_deallocation, alloc_id = alloc_id.0);
         match self {
             AllocState::StackedBorrows(sb) =>
                 sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine),
@@ -482,6 +493,7 @@ impl AllocState {
     }
 
     pub fn remove_unreachable_tags(&self, tags: &FxHashSet<BorTag>) {
+        let _span = enter_trace_span!(borrow_tracker::remove_unreachable_tags);
         match self {
             AllocState::StackedBorrows(sb) => sb.borrow_mut().remove_unreachable_tags(tags),
             AllocState::TreeBorrows(tb) => tb.borrow_mut().remove_unreachable_tags(tags),
@@ -496,6 +508,11 @@ impl AllocState {
         tag: BorTag,
         alloc_id: AllocId, // diagnostics
     ) -> InterpResult<'tcx> {
+        let _span = enter_trace_span!(
+            borrow_tracker::release_protector,
+            alloc_id = alloc_id.0,
+            tag = tag.0
+        );
         match self {
             AllocState::StackedBorrows(_sb) => interp_ok(()),
             AllocState::TreeBorrows(tb) =>
@@ -506,6 +523,7 @@ impl AllocState {
 
 impl VisitProvenance for AllocState {
     fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
+        let _span = enter_trace_span!(borrow_tracker::visit_provenance);
         match self {
             AllocState::StackedBorrows(sb) => sb.visit_provenance(visit),
             AllocState::TreeBorrows(tb) => tb.visit_provenance(visit),
diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
index b8bcacf7c9943..834a4b41f2245 100644
--- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
@@ -30,7 +30,7 @@ pub type AllocState = Stacks;
 #[derive(Clone, Debug)]
 pub struct Stacks {
     // Even reading memory can have effects on the stack, so we need a `RefCell` here.
-    stacks: RangeMap<Stack>,
+    stacks: DedupRangeMap<Stack>,
     /// Stores past operations on this allocation
     history: AllocHistory,
     /// The set of tags that have been exposed inside this allocation.
@@ -468,7 +468,7 @@ impl<'tcx> Stacks {
         let stack = Stack::new(item);
 
         Stacks {
-            stacks: RangeMap::new(size, stack),
+            stacks: DedupRangeMap::new(size, stack),
             history: AllocHistory::new(id, item, machine),
             exposed_tags: FxHashSet::default(),
         }
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
index a0761cb07a1da..c157c69d7c843 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@@ -314,7 +314,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let span = this.machine.current_span();
 
         // Store initial permissions and their corresponding range.
-        let mut perms_map: RangeMap<LocationState> = RangeMap::new(
+        let mut perms_map: DedupRangeMap<LocationState> = DedupRangeMap::new(
             ptr_size,
             LocationState::new_accessed(Permission::new_disabled(), IdempotentForeignAccess::None), // this will be overwritten
         );
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
index 48e4a19e263fb..1f29bcfc2b035 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
@@ -247,7 +247,7 @@ pub struct Tree {
     /// `unwrap` any `perm.get(key)`.
     ///
     /// We do uphold the fact that `keys(perms)` is a subset of `keys(nodes)`
-    pub(super) rperms: RangeMap<UniValMap<LocationState>>,
+    pub(super) rperms: DedupRangeMap<UniValMap<LocationState>>,
     /// The index of the root node.
     pub(super) root: UniIndex,
 }
@@ -609,7 +609,7 @@ impl Tree {
                     IdempotentForeignAccess::None,
                 ),
             );
-            RangeMap::new(size, perms)
+            DedupRangeMap::new(size, perms)
         };
         Self { root: root_idx, nodes, rperms, tag_mapping }
     }
@@ -631,7 +631,7 @@ impl<'tcx> Tree {
         base_offset: Size,
         parent_tag: BorTag,
         new_tag: BorTag,
-        initial_perms: RangeMap<LocationState>,
+        initial_perms: DedupRangeMap<LocationState>,
         default_perm: Permission,
         protected: bool,
         span: Span,
diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs
index b5e7e9d0ac0d8..38d76f5cf73b7 100644
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@@ -997,7 +997,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
 #[derive(Debug, Clone)]
 pub struct VClockAlloc {
     /// Assigning each byte a MemoryCellClocks.
-    alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>,
+    alloc_ranges: RefCell<DedupRangeMap<MemoryCellClocks>>,
 }
 
 impl VisitProvenance for VClockAlloc {
@@ -1045,7 +1045,7 @@ impl VClockAlloc {
                 (VTimestamp::ZERO, global.thread_index(ThreadId::MAIN_THREAD)),
         };
         VClockAlloc {
-            alloc_ranges: RefCell::new(RangeMap::new(
+            alloc_ranges: RefCell::new(DedupRangeMap::new(
                 len,
                 MemoryCellClocks::new(alloc_timestamp, alloc_index),
             )),
diff --git a/src/tools/miri/src/concurrency/mod.rs b/src/tools/miri/src/concurrency/mod.rs
index 49bcc0d30b506..c2ea8a00decd8 100644
--- a/src/tools/miri/src/concurrency/mod.rs
+++ b/src/tools/miri/src/concurrency/mod.rs
@@ -2,7 +2,6 @@ pub mod cpu_affinity;
 pub mod data_race;
 mod data_race_handler;
 pub mod init_once;
-mod range_object_map;
 pub mod sync;
 pub mod thread;
 mod vector_clock;
diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs
index a4e93f9222c23..878afdf251731 100644
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@@ -186,15 +186,15 @@ pub struct Thread<'tcx> {
     /// The join status.
     join_status: ThreadJoinStatus,
 
-    /// Stack of active panic payloads for the current thread. Used for storing
-    /// the argument of the call to `miri_start_unwind` (the panic payload) when unwinding.
+    /// Stack of active unwind payloads for the current thread. Used for storing
+    /// the argument of the call to `miri_start_unwind` (the payload) when unwinding.
     /// This is pointer-sized, and matches the `Payload` type in `src/libpanic_unwind/miri.rs`.
     ///
     /// In real unwinding, the payload gets passed as an argument to the landing pad,
     /// which then forwards it to 'Resume'. However this argument is implicit in MIR,
     /// so we have to store it out-of-band. When there are multiple active unwinds,
     /// the innermost one is always caught first, so we can store them as a stack.
-    pub(crate) panic_payloads: Vec<ImmTy<'tcx>>,
+    pub(crate) unwind_payloads: Vec<ImmTy<'tcx>>,
 
     /// Last OS error location in memory. It is a 32-bit integer.
     pub(crate) last_error: Option<MPlaceTy<'tcx>>,
@@ -282,7 +282,7 @@ impl<'tcx> Thread<'tcx> {
             stack: Vec::new(),
             top_user_relevant_frame: None,
             join_status: ThreadJoinStatus::Joinable,
-            panic_payloads: Vec::new(),
+            unwind_payloads: Vec::new(),
             last_error: None,
             on_stack_empty,
         }
@@ -292,7 +292,7 @@ impl<'tcx> Thread<'tcx> {
 impl VisitProvenance for Thread<'_> {
     fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
         let Thread {
-            panic_payloads: panic_payload,
+            unwind_payloads: panic_payload,
             last_error,
             stack,
             top_user_relevant_frame: _,
@@ -677,6 +677,8 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
     fn run_on_stack_empty(&mut self) -> InterpResult<'tcx, Poll<()>> {
         let this = self.eval_context_mut();
         // Inform GenMC that a thread has finished all user code. GenMC needs to know this for scheduling.
+        // FIXME(GenMC): Thread-local destructors *are* user code, so this is odd. Also now that we
+        // support pre-main constructors, it can get called there as well.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
             let thread_id = this.active_thread();
             genmc_ctx.handle_thread_stack_empty(thread_id);
diff --git a/src/tools/miri/src/concurrency/weak_memory.rs b/src/tools/miri/src/concurrency/weak_memory.rs
index 95c010be2fd21..a752ef7e45480 100644
--- a/src/tools/miri/src/concurrency/weak_memory.rs
+++ b/src/tools/miri/src/concurrency/weak_memory.rs
@@ -90,9 +90,9 @@ use rustc_data_structures::fx::FxHashMap;
 
 use super::AllocDataRaceHandler;
 use super::data_race::{GlobalState as DataRaceState, ThreadClockSet};
-use super::range_object_map::{AccessType, RangeObjectMap};
 use super::vector_clock::{VClock, VTimestamp, VectorIdx};
 use crate::concurrency::GlobalDataRaceHandler;
+use crate::data_structures::range_object_map::{AccessType, RangeObjectMap};
 use crate::*;
 
 pub type AllocState = StoreBufferAlloc;
diff --git a/src/tools/miri/src/range_map.rs b/src/tools/miri/src/data_structures/dedup_range_map.rs
similarity index 95%
rename from src/tools/miri/src/range_map.rs
rename to src/tools/miri/src/data_structures/dedup_range_map.rs
index 29a5a8537a41d..56e9883b1caaf 100644
--- a/src/tools/miri/src/range_map.rs
+++ b/src/tools/miri/src/data_structures/dedup_range_map.rs
@@ -17,18 +17,18 @@ struct Elem<T> {
     data: T,
 }
 #[derive(Clone, Debug)]
-pub struct RangeMap<T> {
+pub struct DedupRangeMap<T> {
     v: Vec<Elem<T>>,
 }
 
-impl<T> RangeMap<T> {
+impl<T> DedupRangeMap<T> {
     /// Creates a new `RangeMap` for the given size, and with the given initial value used for
     /// the entire range.
     #[inline(always)]
-    pub fn new(size: Size, init: T) -> RangeMap<T> {
+    pub fn new(size: Size, init: T) -> DedupRangeMap<T> {
         let size = size.bytes();
         let v = if size > 0 { vec![Elem { range: 0..size, data: init }] } else { Vec::new() };
-        RangeMap { v }
+        DedupRangeMap { v }
     }
 
     pub fn size(&self) -> Size {
@@ -246,7 +246,7 @@ mod tests {
     use super::*;
 
     /// Query the map at every offset in the range and collect the results.
-    fn to_vec<T: Copy>(map: &RangeMap<T>, offset: u64, len: u64) -> Vec<T> {
+    fn to_vec<T: Copy>(map: &DedupRangeMap<T>, offset: u64, len: u64) -> Vec<T> {
         (offset..offset + len)
             .map(|i| {
                 map.iter(Size::from_bytes(i), Size::from_bytes(1)).next().map(|(_, &t)| t).unwrap()
@@ -256,7 +256,7 @@ mod tests {
 
     #[test]
     fn basic_insert() {
-        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
+        let mut map = DedupRangeMap::<i32>::new(Size::from_bytes(20), -1);
         // Insert.
         for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) {
             *x = 42;
@@ -278,7 +278,7 @@ mod tests {
 
     #[test]
     fn gaps() {
-        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
+        let mut map = DedupRangeMap::<i32>::new(Size::from_bytes(20), -1);
         for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) {
             *x = 42;
         }
@@ -319,26 +319,26 @@ mod tests {
     #[test]
     #[should_panic]
     fn out_of_range_iter_mut() {
-        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
+        let mut map = DedupRangeMap::<i32>::new(Size::from_bytes(20), -1);
         let _ = map.iter_mut(Size::from_bytes(11), Size::from_bytes(11));
     }
 
     #[test]
     #[should_panic]
     fn out_of_range_iter() {
-        let map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
+        let map = DedupRangeMap::<i32>::new(Size::from_bytes(20), -1);
         let _ = map.iter(Size::from_bytes(11), Size::from_bytes(11));
     }
 
     #[test]
     fn empty_map_iter() {
-        let map = RangeMap::<i32>::new(Size::from_bytes(0), -1);
+        let map = DedupRangeMap::<i32>::new(Size::from_bytes(0), -1);
         let _ = map.iter(Size::from_bytes(0), Size::from_bytes(0));
     }
 
     #[test]
     fn empty_map_iter_mut() {
-        let mut map = RangeMap::<i32>::new(Size::from_bytes(0), -1);
+        let mut map = DedupRangeMap::<i32>::new(Size::from_bytes(0), -1);
         let _ = map.iter_mut(Size::from_bytes(0), Size::from_bytes(0));
     }
 }
diff --git a/src/tools/miri/src/data_structures/mod.rs b/src/tools/miri/src/data_structures/mod.rs
new file mode 100644
index 0000000000000..d4468bc57eae9
--- /dev/null
+++ b/src/tools/miri/src/data_structures/mod.rs
@@ -0,0 +1,3 @@
+pub mod dedup_range_map;
+pub mod mono_hash_map;
+pub mod range_object_map;
diff --git a/src/tools/miri/src/mono_hash_map.rs b/src/tools/miri/src/data_structures/mono_hash_map.rs
similarity index 100%
rename from src/tools/miri/src/mono_hash_map.rs
rename to src/tools/miri/src/data_structures/mono_hash_map.rs
diff --git a/src/tools/miri/src/concurrency/range_object_map.rs b/src/tools/miri/src/data_structures/range_object_map.rs
similarity index 100%
rename from src/tools/miri/src/concurrency/range_object_map.rs
rename to src/tools/miri/src/data_structures/range_object_map.rs
diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs
index 425a136dfa54c..be6404f64e8f8 100644
--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@@ -11,14 +11,14 @@ use rustc_abi::ExternAbi;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_hir::def::Namespace;
 use rustc_hir::def_id::DefId;
-use rustc_middle::ty::layout::LayoutCx;
+use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutCx};
 use rustc_middle::ty::{self, Ty, TyCtxt};
 use rustc_session::config::EntryFnType;
 
 use crate::concurrency::GenmcCtx;
 use crate::concurrency::thread::TlsAllocAction;
 use crate::diagnostics::report_leaks;
-use crate::shims::tls;
+use crate::shims::{global_ctor, tls};
 use crate::*;
 
 #[derive(Copy, Clone, Debug)]
@@ -216,9 +216,17 @@ impl Default for MiriConfig {
 }
 
 /// The state of the main thread. Implementation detail of `on_main_stack_empty`.
-#[derive(Default, Debug)]
+#[derive(Debug)]
 enum MainThreadState<'tcx> {
-    #[default]
+    GlobalCtors {
+        ctor_state: global_ctor::GlobalCtorState<'tcx>,
+        /// The main function to call.
+        entry_id: DefId,
+        entry_type: MiriEntryFnType,
+        /// Arguments passed to `main`.
+        argc: ImmTy<'tcx>,
+        argv: ImmTy<'tcx>,
+    },
     Running,
     TlsDtors(tls::TlsDtorsState<'tcx>),
     Yield {
@@ -234,6 +242,15 @@ impl<'tcx> MainThreadState<'tcx> {
     ) -> InterpResult<'tcx, Poll<()>> {
         use MainThreadState::*;
         match self {
+            GlobalCtors { ctor_state, entry_id, entry_type, argc, argv } => {
+                match ctor_state.on_stack_empty(this)? {
+                    Poll::Pending => {} // just keep going
+                    Poll::Ready(()) => {
+                        call_main(this, *entry_id, *entry_type, argc.clone(), argv.clone())?;
+                        *self = Running;
+                    }
+                }
+            }
             Running => {
                 *self = TlsDtors(Default::default());
             }
@@ -309,13 +326,6 @@ pub fn create_ecx<'tcx>(
         MiriMachine::new(config, layout_cx, genmc_ctx),
     );
 
-    // Some parts of initialization require a full `InterpCx`.
-    MiriMachine::late_init(&mut ecx, config, {
-        let mut state = MainThreadState::default();
-        // Cannot capture anything GC-relevant here.
-        Box::new(move |m| state.on_main_stack_empty(m))
-    })?;
-
     // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
     let sentinel =
         helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
@@ -326,15 +336,9 @@ pub fn create_ecx<'tcx>(
         );
     }
 
-    // Setup first stack frame.
-    let entry_instance = ty::Instance::mono(tcx, entry_id);
-
-    // First argument is constructed later, because it's skipped for `miri_start.`
-
-    // Second argument (argc): length of `config.args`.
+    // Compute argc and argv from `config.args`.
     let argc =
         ImmTy::from_int(i64::try_from(config.args.len()).unwrap(), ecx.machine.layouts.isize);
-    // Third argument (`argv`): created from `config.args`.
     let argv = {
         // Put each argument in memory, collect pointers.
         let mut argvs = Vec::<Immediate<Provenance>>::with_capacity(config.args.len());
@@ -359,7 +363,7 @@ pub fn create_ecx<'tcx>(
             ecx.write_immediate(arg, &place)?;
         }
         ecx.mark_immutable(&argvs_place);
-        // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`.
+        // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`, and for the GC to see them.
         {
             let argc_place =
                 ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
@@ -374,7 +378,7 @@ pub fn create_ecx<'tcx>(
             ecx.machine.argv = Some(argv_place.ptr());
         }
         // Store command line as UTF-16 for Windows `GetCommandLineW`.
-        {
+        if tcx.sess.target.os == "windows" {
             // Construct a command string with all the arguments.
             let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
 
@@ -395,11 +399,43 @@ pub fn create_ecx<'tcx>(
         ImmTy::from_immediate(imm, layout)
     };
 
+    // Some parts of initialization require a full `InterpCx`.
+    MiriMachine::late_init(&mut ecx, config, {
+        let mut main_thread_state = MainThreadState::GlobalCtors {
+            entry_id,
+            entry_type,
+            argc,
+            argv,
+            ctor_state: global_ctor::GlobalCtorState::default(),
+        };
+
+        // Cannot capture anything GC-relevant here.
+        // `argc` and `argv` *are* GC_relevant, but they also get stored in `machine.argc` and
+        // `machine.argv` so we are good.
+        Box::new(move |m| main_thread_state.on_main_stack_empty(m))
+    })?;
+
+    interp_ok(ecx)
+}
+
+// Call the entry function.
+fn call_main<'tcx>(
+    ecx: &mut MiriInterpCx<'tcx>,
+    entry_id: DefId,
+    entry_type: MiriEntryFnType,
+    argc: ImmTy<'tcx>,
+    argv: ImmTy<'tcx>,
+) -> InterpResult<'tcx, ()> {
+    let tcx = ecx.tcx();
+
+    // Setup first stack frame.
+    let entry_instance = ty::Instance::mono(tcx, entry_id);
+
     // Return place (in static memory so that it does not count as leak).
     let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
     ecx.machine.main_fn_ret_place = Some(ret_place.clone());
-    // Call start function.
 
+    // Call start function.
     match entry_type {
         MiriEntryFnType::Rustc(EntryFnType::Main { .. }) => {
             let start_id = tcx.lang_items().start_fn().unwrap_or_else(|| {
@@ -409,7 +445,7 @@ pub fn create_ecx<'tcx>(
             let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
             let start_instance = ty::Instance::try_resolve(
                 tcx,
-                typing_env,
+                ecx.typing_env(),
                 start_id,
                 tcx.mk_args(&[ty::GenericArg::from(main_ret_ty)]),
             )
@@ -427,7 +463,7 @@ pub fn create_ecx<'tcx>(
                 ExternAbi::Rust,
                 &[
                     ImmTy::from_scalar(
-                        Scalar::from_pointer(main_ptr, &ecx),
+                        Scalar::from_pointer(main_ptr, ecx),
                         // FIXME use a proper fn ptr type
                         ecx.machine.layouts.const_raw_ptr,
                     ),
@@ -450,7 +486,7 @@ pub fn create_ecx<'tcx>(
         }
     }
 
-    interp_ok(ecx)
+    interp_ok(())
 }
 
 /// Evaluates the entry function specified by `entry_id`.
diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs
index c150dc16b0729..35d3e7e647ccf 100644
--- a/src/tools/miri/src/helpers.rs
+++ b/src/tools/miri/src/helpers.rs
@@ -1235,8 +1235,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         interp_ok(())
     }
 
-    /// Lookup an array of immediates stored as a linker section of name `name`.
-    fn lookup_link_section(&mut self, name: &str) -> InterpResult<'tcx, Vec<ImmTy<'tcx>>> {
+    /// Lookup an array of immediates from any linker sections matching the provided predicate.
+    fn lookup_link_section(
+        &mut self,
+        include_name: impl Fn(&str) -> bool,
+    ) -> InterpResult<'tcx, Vec<ImmTy<'tcx>>> {
         let this = self.eval_context_mut();
         let tcx = this.tcx.tcx;
 
@@ -1247,7 +1250,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             let Some(link_section) = attrs.link_section else {
                 return interp_ok(());
             };
-            if link_section.as_str() == name {
+            if include_name(link_section.as_str()) {
                 let instance = ty::Instance::mono(tcx, def_id);
                 let const_val = this.eval_global(instance).unwrap_or_else(|err| {
                     panic!(
@@ -1431,3 +1434,44 @@ impl ToU64 for usize {
         self.try_into().unwrap()
     }
 }
+
+/// This struct is needed to enforce `#[must_use]` on values produced by [enter_trace_span] even
+/// when the "tracing" feature is not enabled.
+#[must_use]
+pub struct MaybeEnteredTraceSpan {
+    #[cfg(feature = "tracing")]
+    pub _entered_span: tracing::span::EnteredSpan,
+}
+
+/// Enters a [tracing::info_span] only if the "tracing" feature is enabled, otherwise does nothing.
+/// This is like [rustc_const_eval::enter_trace_span] except that it does not depend on the
+/// [Machine] trait to check if tracing is enabled, because from the Miri codebase we can directly
+/// check whether the "tracing" feature is enabled, unlike from the rustc_const_eval codebase.
+///
+/// In addition to the syntax accepted by [tracing::span!], this macro optionally allows passing
+/// the span name (i.e. the first macro argument) in the form `NAME::SUBNAME` (without quotes) to
+/// indicate that the span has name "NAME" (usually the name of the component) and has an additional
+/// more specific name "SUBNAME" (usually the function name). The latter is passed to the [tracing]
+/// infrastructure as a span field with the name "NAME". This allows not being distracted by
+/// subnames when looking at the trace in <https://ui.perfetto.dev>, but when deeper introspection
+/// is needed within a component, it's still possible to view the subnames directly in the UI by
+/// selecting a span, clicking on the "NAME" argument on the right, and clicking on "Visualize
+/// argument values".
+/// ```rust
+/// // for example, the first will expand to the second
+/// enter_trace_span!(borrow_tracker::on_stack_pop, /* ... */)
+/// enter_trace_span!("borrow_tracker", borrow_tracker = "on_stack_pop", /* ... */)
+/// ```
+#[macro_export]
+macro_rules! enter_trace_span {
+    ($name:ident :: $subname:ident $($tt:tt)*) => {{
+        enter_trace_span!(stringify!($name), $name = %stringify!(subname) $($tt)*)
+    }};
+
+    ($($tt:tt)*) => {
+        $crate::MaybeEnteredTraceSpan {
+            #[cfg(feature = "tracing")]
+            _entered_span: tracing::info_span!($($tt)*).entered()
+        }
+    };
+}
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index c86e33e518591..a591d21071db6 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -15,6 +15,7 @@
 #![feature(unqualified_local_imports)]
 #![feature(derive_coerce_pointee)]
 #![feature(arbitrary_self_types)]
+#![feature(iter_advance_by)]
 // Configure clippy and other lints
 #![allow(
     clippy::collapsible_else_if,
@@ -75,16 +76,15 @@ mod alloc_addresses;
 mod borrow_tracker;
 mod clock;
 mod concurrency;
+mod data_structures;
 mod diagnostics;
 mod eval;
 mod helpers;
 mod intrinsics;
 mod machine;
 mod math;
-mod mono_hash_map;
 mod operator;
 mod provenance_gc;
-mod range_map;
 mod shims;
 
 // Establish a "crate-wide prelude": we often import `crate::*`.
@@ -97,10 +97,10 @@ pub use rustc_const_eval::interpret::{self, AllocMap, Provenance as _};
 use rustc_middle::{bug, span_bug};
 use tracing::{info, trace};
 
-//#[cfg(target_os = "linux")]
-//pub mod native_lib {
-//    pub use crate::shims::{init_sv, register_retcode_sv};
-//}
+#[cfg(target_os = "linux")]
+pub mod native_lib {
+    pub use crate::shims::{init_sv, register_retcode_sv};
+}
 
 // Type aliases that set the provenance parameter.
 pub type Pointer = interpret::Pointer<Option<machine::Provenance>>;
@@ -132,6 +132,8 @@ pub use crate::concurrency::thread::{
     ThreadManager, TimeoutAnchor, TimeoutClock, UnblockKind,
 };
 pub use crate::concurrency::{GenmcConfig, GenmcCtx};
+pub use crate::data_structures::dedup_range_map::DedupRangeMap;
+pub use crate::data_structures::mono_hash_map::MonoHashMap;
 pub use crate::diagnostics::{
     EvalContextExt as _, NonHaltingDiagnostic, TerminationInfo, report_error,
 };
@@ -139,24 +141,25 @@ pub use crate::eval::{
     AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, MiriEntryFnType, RejectOpWith,
     ValidationMode, create_ecx, eval_entry,
 };
-pub use crate::helpers::{AccessKind, EvalContextExt as _, ToU64 as _, ToUsize as _};
+pub use crate::helpers::{
+    AccessKind, EvalContextExt as _, MaybeEnteredTraceSpan, ToU64 as _, ToUsize as _,
+};
 pub use crate::intrinsics::EvalContextExt as _;
 pub use crate::machine::{
     AllocExtra, DynMachineCallback, FrameExtra, MachineCallback, MemoryKind, MiriInterpCx,
     MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra,
 };
-pub use crate::mono_hash_map::MonoHashMap;
 pub use crate::operator::EvalContextExt as _;
 pub use crate::provenance_gc::{EvalContextExt as _, LiveAllocs, VisitProvenance, VisitWith};
-pub use crate::range_map::RangeMap;
 pub use crate::shims::EmulateItemResult;
 pub use crate::shims::env::{EnvVars, EvalContextExt as _};
 pub use crate::shims::foreign_items::{DynSym, EvalContextExt as _};
 pub use crate::shims::io_error::{EvalContextExt as _, IoError, LibcError};
 pub use crate::shims::os_str::EvalContextExt as _;
-pub use crate::shims::panic::{CatchUnwindData, EvalContextExt as _};
+pub use crate::shims::panic::EvalContextExt as _;
 pub use crate::shims::time::EvalContextExt as _;
 pub use crate::shims::tls::TlsData;
+pub use crate::shims::unwind::{CatchUnwindData, EvalContextExt as _};
 
 /// Insert rustc arguments at the beginning of the argument list that Miri wants to be
 /// set per default, for maximal validation power.
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index 35399dbf4cb0c..9c077e5b7b6e9 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -1828,8 +1828,11 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
 
     fn enter_trace_span(span: impl FnOnce() -> tracing::Span) -> impl EnteredTraceSpan {
         #[cfg(feature = "tracing")]
-        { span().entered() }
+        {
+            span().entered()
+        }
         #[cfg(not(feature = "tracing"))]
+        #[expect(clippy::unused_unit)]
         {
             let _ = span; // so we avoid the "unused variable" warning
             ()
diff --git a/src/tools/miri/src/shims/global_ctor.rs b/src/tools/miri/src/shims/global_ctor.rs
new file mode 100644
index 0000000000000..c56251bbe63ae
--- /dev/null
+++ b/src/tools/miri/src/shims/global_ctor.rs
@@ -0,0 +1,98 @@
+//! Implement global constructors.
+
+use std::task::Poll;
+
+use rustc_abi::ExternAbi;
+use rustc_target::spec::BinaryFormat;
+
+use crate::*;
+
+#[derive(Debug, Default)]
+pub struct GlobalCtorState<'tcx>(GlobalCtorStatePriv<'tcx>);
+
+#[derive(Debug, Default)]
+enum GlobalCtorStatePriv<'tcx> {
+    #[default]
+    Init,
+    /// The list of constructor functions that we still have to call.
+    Ctors(Vec<ImmTy<'tcx>>),
+    Done,
+}
+
+impl<'tcx> GlobalCtorState<'tcx> {
+    pub fn on_stack_empty(
+        &mut self,
+        this: &mut MiriInterpCx<'tcx>,
+    ) -> InterpResult<'tcx, Poll<()>> {
+        use GlobalCtorStatePriv::*;
+        let new_state = 'new_state: {
+            match &mut self.0 {
+                Init => {
+                    let this = this.eval_context_mut();
+
+                    // Lookup constructors from the relevant magic link section.
+                    let ctors = match this.tcx.sess.target.binary_format {
+                        // Read the CRT library section on Windows.
+                        BinaryFormat::Coff =>
+                            this.lookup_link_section(|section| section == ".CRT$XCU")?,
+
+                        // Read the `__mod_init_func` section on macOS.
+                        BinaryFormat::MachO =>
+                            this.lookup_link_section(|section| {
+                                let mut parts = section.splitn(3, ',');
+                                let (segment_name, section_name, section_type) =
+                                    (parts.next(), parts.next(), parts.next());
+
+                                segment_name == Some("__DATA")
+                                    && section_name == Some("__mod_init_func")
+                                    // The `mod_init_funcs` directive ensures that the
+                                    // `S_MOD_INIT_FUNC_POINTERS` flag is set on the section. LLVM
+                                    // adds this automatically so we currently do not require it.
+                                    // FIXME: is this guaranteed LLVM behavior? If not, we shouldn't
+                                    // implicitly add it here. Also see
+                                    // <https://github.com/rust-lang/miri/pull/4459#discussion_r2200115629>.
+                                    && matches!(section_type, None | Some("mod_init_funcs"))
+                            })?,
+
+                        // Read the standard `.init_array` section on platforms that use ELF, or WASM,
+                        // which supports the same linker directive.
+                        // FIXME: Add support for `.init_array.N` and `.ctors`?
+                        BinaryFormat::Elf | BinaryFormat::Wasm =>
+                            this.lookup_link_section(|section| section == ".init_array")?,
+
+                        // Other platforms have no global ctor support.
+                        _ => break 'new_state Done,
+                    };
+
+                    break 'new_state Ctors(ctors);
+                }
+                Ctors(ctors) => {
+                    if let Some(ctor) = ctors.pop() {
+                        let this = this.eval_context_mut();
+
+                        let ctor = ctor.to_scalar().to_pointer(this)?;
+                        let thread_callback = this.get_ptr_fn(ctor)?.as_instance()?;
+
+                        // The signature of this function is `unsafe extern "C" fn()`.
+                        this.call_function(
+                            thread_callback,
+                            ExternAbi::C { unwind: false },
+                            &[],
+                            None,
+                            ReturnContinuation::Stop { cleanup: true },
+                        )?;
+
+                        return interp_ok(Poll::Pending); // we stay in this state (but `ctors` got shorter)
+                    }
+
+                    // No more constructors to run.
+                    break 'new_state Done;
+                }
+                Done => return interp_ok(Poll::Ready(())),
+            }
+        };
+
+        self.0 = new_state;
+        interp_ok(Poll::Pending)
+    }
+}
diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs
index f09fc546b3eba..75540f6f15000 100644
--- a/src/tools/miri/src/shims/mod.rs
+++ b/src/tools/miri/src/shims/mod.rs
@@ -14,15 +14,17 @@ mod x86;
 pub mod env;
 pub mod extern_static;
 pub mod foreign_items;
+pub mod global_ctor;
 pub mod io_error;
 pub mod os_str;
 pub mod panic;
 pub mod time;
 pub mod tls;
+pub mod unwind;
 
 pub use self::files::FdTable;
-//#[cfg(target_os = "linux")]
-//pub use self::native_lib::trace::{init_sv, register_retcode_sv};
+#[cfg(target_os = "linux")]
+pub use self::native_lib::trace::{init_sv, register_retcode_sv};
 pub use self::unix::{DirTable, EpollInterestTable};
 
 /// What needs to be done after emulating an item (a shim or an intrinsic) is done.
diff --git a/src/tools/miri/src/shims/native_lib/mod.rs b/src/tools/miri/src/shims/native_lib/mod.rs
index 02c3bde036d48..fb7b1df41a4cf 100644
--- a/src/tools/miri/src/shims/native_lib/mod.rs
+++ b/src/tools/miri/src/shims/native_lib/mod.rs
@@ -1,9 +1,5 @@
 //! Implements calling functions from a native library.
 
-// FIXME: disabled since it fails to build on many targets.
-//#[cfg(target_os = "linux")]
-//pub mod trace;
-
 use std::ops::Deref;
 
 use libffi::high::call as ffi;
@@ -13,14 +9,68 @@ use rustc_middle::mir::interpret::Pointer;
 use rustc_middle::ty::{self as ty, IntTy, UintTy};
 use rustc_span::Symbol;
 
-//#[cfg(target_os = "linux")]
-//use self::trace::Supervisor;
+#[cfg_attr(
+    not(all(
+        target_os = "linux",
+        target_env = "gnu",
+        any(target_arch = "x86", target_arch = "x86_64")
+    )),
+    path = "trace/stub.rs"
+)]
+pub mod trace;
+
 use crate::*;
 
-//#[cfg(target_os = "linux")]
-//type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option<self::trace::messages::MemEvents>)>;
-//#[cfg(not(target_os = "linux"))]
-type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option<!>)>;
+/// The final results of an FFI trace, containing every relevant event detected
+/// by the tracer.
+#[allow(dead_code)]
+#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
+#[derive(Debug)]
+pub struct MemEvents {
+    /// An list of memory accesses that occurred, in the order they occurred in.
+    pub acc_events: Vec<AccessEvent>,
+}
+
+/// A single memory access.
+#[allow(dead_code)]
+#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
+#[derive(Clone, Debug)]
+pub enum AccessEvent {
+    /// A read occurred on this memory range.
+    Read(AccessRange),
+    /// A write may have occurred on this memory range.
+    /// Some instructions *may* write memory without *always* doing that,
+    /// so this can be an over-approximation.
+    /// The range info, however, is reliable if the access did happen.
+    /// If the second field is true, the access definitely happened.
+    Write(AccessRange, bool),
+}
+
+impl AccessEvent {
+    fn get_range(&self) -> AccessRange {
+        match self {
+            AccessEvent::Read(access_range) => access_range.clone(),
+            AccessEvent::Write(access_range, _) => access_range.clone(),
+        }
+    }
+}
+
+/// The memory touched by a given access.
+#[allow(dead_code)]
+#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
+#[derive(Clone, Debug)]
+pub struct AccessRange {
+    /// The base address in memory where an access occurred.
+    pub addr: usize,
+    /// The number of bytes affected from the base.
+    pub size: usize,
+}
+
+impl AccessRange {
+    fn end(&self) -> usize {
+        self.addr.strict_add(self.size)
+    }
+}
 
 impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
 trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
@@ -31,18 +81,17 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
         dest: &MPlaceTy<'tcx>,
         ptr: CodePtr,
         libffi_args: Vec<libffi::high::Arg<'a>>,
-    ) -> CallResult<'tcx> {
+    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
         let this = self.eval_context_mut();
-        //#[cfg(target_os = "linux")]
-        //let alloc = this.machine.allocator.as_ref().unwrap();
-
-        // SAFETY: We don't touch the machine memory past this point.
-        //#[cfg(target_os = "linux")]
-        //let (guard, stack_ptr) = unsafe { Supervisor::start_ffi(alloc) };
+        #[cfg(target_os = "linux")]
+        let alloc = this.machine.allocator.as_ref().unwrap();
+        #[cfg(not(target_os = "linux"))]
+        // Placeholder value.
+        let alloc = ();
 
-        // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
-        // as the specified primitive integer type
-        let res = 'res: {
+        trace::Supervisor::do_ffi(alloc, || {
+            // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
+            // as the specified primitive integer type
             let scalar = match dest.layout.ty.kind() {
                 // ints
                 ty::Int(IntTy::I8) => {
@@ -93,7 +142,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // have the output_type `Tuple([])`.
                 ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
                     unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
-                    break 'res interp_ok(ImmTy::uninit(dest.layout));
+                    return interp_ok(ImmTy::uninit(dest.layout));
                 }
                 ty::RawPtr(..) => {
                     let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
@@ -101,23 +150,14 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     Scalar::from_pointer(ptr, this)
                 }
                 _ =>
-                    break 'res Err(err_unsup_format!(
+                    return Err(err_unsup_format!(
                         "unsupported return type for native call: {:?}",
                         link_name
                     ))
                     .into(),
             };
             interp_ok(ImmTy::from_scalar(scalar, dest.layout))
-        };
-
-        // SAFETY: We got the guard and stack pointer from start_ffi, and
-        // the allocator is the same
-        //#[cfg(target_os = "linux")]
-        //let events = unsafe { Supervisor::end_ffi(alloc, guard, stack_ptr) };
-        //#[cfg(not(target_os = "linux"))]
-        let events = None;
-
-        interp_ok((res?, events))
+        })
     }
 
     /// Get the pointer to the function of the specified name in the shared object file,
@@ -169,6 +209,73 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
         }
         None
     }
+
+    /// Applies the `events` to Miri's internal state. The event vector must be
+    /// ordered sequentially by when the accesses happened, and the sizes are
+    /// assumed to be exact.
+    fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        for evt in events.acc_events {
+            let evt_rg = evt.get_range();
+            // LLVM at least permits vectorising accesses to adjacent allocations,
+            // so we cannot assume 1 access = 1 allocation. :(
+            let mut rg = evt_rg.addr..evt_rg.end();
+            while let Some(curr) = rg.next() {
+                let Some(alloc_id) = this.alloc_id_from_addr(
+                    curr.to_u64(),
+                    rg.len().try_into().unwrap(),
+                    /* only_exposed_allocations */ true,
+                ) else {
+                    throw_ub_format!("Foreign code did an out-of-bounds access!")
+                };
+                let alloc = this.get_alloc_raw(alloc_id)?;
+                // The logical and physical address of the allocation coincide, so we can use
+                // this instead of `addr_from_alloc_id`.
+                let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
+
+                // Determine the range inside the allocation that this access covers. This range is
+                // in terms of offsets from the start of `alloc`. The start of the overlap range
+                // will be `curr`; the end will be the minimum of the end of the allocation and the
+                // end of the access' range.
+                let overlap = curr.strict_sub(alloc_addr)
+                    ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
+                // Skip forward however many bytes of the access are contained in the current
+                // allocation, subtracting 1 since the overlap range includes the current addr
+                // that was already popped off of the range.
+                rg.advance_by(overlap.len().strict_sub(1)).unwrap();
+
+                match evt {
+                    AccessEvent::Read(_) => {
+                        // FIXME: ProvenanceMap should have something like get_range().
+                        let p_map = alloc.provenance();
+                        for idx in overlap {
+                            // If a provenance was read by the foreign code, expose it.
+                            if let Some(prov) = p_map.get(Size::from_bytes(idx), this) {
+                                this.expose_provenance(prov)?;
+                            }
+                        }
+                    }
+                    AccessEvent::Write(_, certain) => {
+                        // Sometimes we aren't certain if a write happened, in which case we
+                        // only initialise that data if the allocation is mutable.
+                        if certain || alloc.mutability.is_mut() {
+                            let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
+                            alloc.process_native_write(
+                                &cx.tcx,
+                                Some(AllocRange {
+                                    start: Size::from_bytes(overlap.start),
+                                    size: Size::from_bytes(overlap.len()),
+                                }),
+                            )
+                        }
+                    }
+                }
+            }
+        }
+
+        interp_ok(())
+    }
 }
 
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
@@ -194,6 +301,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             }
         };
 
+        // Do we have ptrace?
+        let tracing = trace::Supervisor::is_enabled();
+
         // Get the function arguments, and convert them to `libffi`-compatible form.
         let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
         for arg in args.iter() {
@@ -213,12 +323,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // The first time this happens, print a warning.
                 if !this.machine.native_call_mem_warned.replace(true) {
                     // Newly set, so first time we get here.
-                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem {
-                        //#[cfg(target_os = "linux")]
-                        //tracing: self::trace::Supervisor::is_enabled(),
-                        //#[cfg(not(target_os = "linux"))]
-                        tracing: false,
-                    });
+                    this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
                 }
 
                 this.expose_provenance(prov)?;
@@ -245,15 +350,23 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             // be read by FFI. The `black_box` is defensive programming as LLVM likes
             // to (incorrectly) optimize away ptr2int casts whose result is unused.
             std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
-            // Expose all provenances in this allocation, since the native code can do $whatever.
-            for prov in alloc.provenance().provenances() {
-                this.expose_provenance(prov)?;
+
+            if !tracing {
+                // Expose all provenances in this allocation, since the native code can do $whatever.
+                // Can be skipped when tracing; in that case we'll expose just the actually-read parts later.
+                for prov in alloc.provenance().provenances() {
+                    this.expose_provenance(prov)?;
+                }
             }
 
             // Prepare for possible write from native code if mutable.
             if info.mutbl.is_mut() {
                 let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
-                alloc.process_native_write(&cx.tcx, None);
+                // These writes could initialize everything and wreck havoc with the pointers.
+                // We can skip that when tracing; in that case we'll later do that only for the memory that got actually written.
+                if !tracing {
+                    alloc.process_native_write(&cx.tcx, None);
+                }
                 // Also expose *mutable* provenance for the interpreter-level allocation.
                 std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
             }
@@ -265,10 +378,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let (ret, maybe_memevents) =
             this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
 
-        if cfg!(target_os = "linux")
-            && let Some(events) = maybe_memevents
-        {
-            trace!("Registered FFI events:\n{events:#0x?}");
+        if tracing {
+            this.tracing_apply_accesses(maybe_memevents.unwrap())?;
         }
 
         this.write_immediate(*ret, dest)?;
diff --git a/src/tools/miri/src/shims/native_lib/trace/child.rs b/src/tools/miri/src/shims/native_lib/trace/child.rs
index 4961e875c7751..de26cb0fe5570 100644
--- a/src/tools/miri/src/shims/native_lib/trace/child.rs
+++ b/src/tools/miri/src/shims/native_lib/trace/child.rs
@@ -4,12 +4,22 @@ use std::rc::Rc;
 use ipc_channel::ipc;
 use nix::sys::{ptrace, signal};
 use nix::unistd;
+use rustc_const_eval::interpret::InterpResult;
 
 use super::CALLBACK_STACK_SIZE;
-use super::messages::{Confirmation, MemEvents, StartFfiInfo, TraceRequest};
+use super::messages::{Confirmation, StartFfiInfo, TraceRequest};
 use super::parent::{ChildListener, sv_loop};
 use crate::alloc::isolated_alloc::IsolatedAlloc;
+use crate::shims::native_lib::MemEvents;
 
+/// A handle to the single, shared supervisor process across all `MiriMachine`s.
+/// Since it would be very difficult to trace multiple FFI calls in parallel, we
+/// need to ensure that either (a) only one `MiriMachine` is performing an FFI call
+/// at any given time, or (b) there are distinct supervisor and child processes for
+/// each machine. The former was chosen here.
+///
+/// This should only contain a `None` if the supervisor has not (yet) been initialised;
+/// otherwise, if `init_sv` was called and did not error, this will always be nonempty.
 static SUPERVISOR: std::sync::Mutex<Option<Supervisor>> = std::sync::Mutex::new(None);
 
 /// The main means of communication between the child and parent process,
@@ -34,32 +44,23 @@ impl Supervisor {
         SUPERVISOR.lock().unwrap().is_some()
     }
 
-    /// Begins preparations for doing an FFI call. This should be called at
-    /// the last possible moment before entering said call. `alloc` points to
-    /// the allocator which handed out the memory used for this machine.
-    ///
+    /// Performs an arbitrary FFI call, enabling tracing from the supervisor.
     /// As this locks the supervisor via a mutex, no other threads may enter FFI
-    /// until this one returns and its guard is dropped via `end_ffi`. The
-    /// pointer returned should be passed to `end_ffi` to avoid a memory leak.
-    ///
-    /// SAFETY: The resulting guard must be dropped *via `end_ffi`* immediately
-    /// after the desired call has concluded.
-    pub unsafe fn start_ffi(
+    /// until this function returns.
+    pub fn do_ffi<'tcx>(
         alloc: &Rc<RefCell<IsolatedAlloc>>,
-    ) -> (std::sync::MutexGuard<'static, Option<Supervisor>>, Option<*mut [u8; CALLBACK_STACK_SIZE]>)
-    {
+        f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>,
+    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
         let mut sv_guard = SUPERVISOR.lock().unwrap();
-        // If the supervisor is not initialised for whatever reason, fast-fail.
-        // This might be desired behaviour, as even on platforms where ptracing
-        // is not implemented it enables us to enforce that only one FFI call
+        // If the supervisor is not initialised for whatever reason, fast-return.
+        // As a side-effect, even on platforms where ptracing
+        // is not implemented, we enforce that only one FFI call
         // happens at a time.
-        let Some(sv) = sv_guard.take() else {
-            return (sv_guard, None);
-        };
+        let Some(sv) = sv_guard.as_mut() else { return f().map(|v| (v, None)) };
 
         // Get pointers to all the pages the supervisor must allow accesses in
         // and prepare the callback stack.
-        let page_ptrs = alloc.borrow().pages();
+        let page_ptrs = alloc.borrow().pages().collect();
         let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] =
             Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast();
         let stack_ptr = raw_stack_ptr.expose_provenance();
@@ -68,9 +69,9 @@ impl Supervisor {
         // SAFETY: We do not access machine memory past this point until the
         // supervisor is ready to allow it.
         unsafe {
-            if alloc.borrow_mut().prepare_ffi().is_err() {
+            if alloc.borrow_mut().start_ffi().is_err() {
                 // Don't mess up unwinding by maybe leaving the memory partly protected
-                alloc.borrow_mut().unprep_ffi();
+                alloc.borrow_mut().end_ffi();
                 panic!("Cannot protect memory for FFI call!");
             }
         }
@@ -82,27 +83,13 @@ impl Supervisor {
         // enforce an ordering for these events.
         sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap();
         sv.confirm_rx.recv().unwrap();
-        *sv_guard = Some(sv);
         // We need to be stopped for the supervisor to be able to make certain
         // modifications to our memory - simply waiting on the recv() doesn't
         // count.
         signal::raise(signal::SIGSTOP).unwrap();
-        (sv_guard, Some(raw_stack_ptr))
-    }
 
-    /// Undoes FFI-related preparations, allowing Miri to continue as normal, then
-    /// gets the memory accesses and changes performed during the FFI call. Note
-    /// that this may include some spurious accesses done by `libffi` itself in
-    /// the process of executing the function call.
-    ///
-    /// SAFETY: The `sv_guard` and `raw_stack_ptr` passed must be the same ones
-    /// received by a prior call to `start_ffi`, and the allocator must be the
-    /// one passed to it also.
-    pub unsafe fn end_ffi(
-        alloc: &Rc<RefCell<IsolatedAlloc>>,
-        mut sv_guard: std::sync::MutexGuard<'static, Option<Supervisor>>,
-        raw_stack_ptr: Option<*mut [u8; CALLBACK_STACK_SIZE]>,
-    ) -> Option<MemEvents> {
+        let res = f();
+
         // We can't use IPC channels here to signal that FFI mode has ended,
         // since they might allocate memory which could get us stuck in a SIGTRAP
         // with no easy way out! While this could be worked around, it is much
@@ -113,42 +100,40 @@ impl Supervisor {
         signal::raise(signal::SIGUSR1).unwrap();
 
         // This is safe! It just sets memory to normal expected permissions.
-        alloc.borrow_mut().unprep_ffi();
+        alloc.borrow_mut().end_ffi();
 
-        // If this is `None`, then `raw_stack_ptr` is None and does not need to
-        // be deallocated (and there's no need to worry about the guard, since
-        // it contains nothing).
-        let sv = sv_guard.take()?;
         // SAFETY: Caller upholds that this pointer was allocated as a box with
         // this type.
         unsafe {
-            drop(Box::from_raw(raw_stack_ptr.unwrap()));
+            drop(Box::from_raw(raw_stack_ptr));
         }
         // On the off-chance something really weird happens, don't block forever.
-        let ret = sv
+        let events = sv
             .event_rx
             .try_recv_timeout(std::time::Duration::from_secs(5))
             .map_err(|e| {
                 match e {
                     ipc::TryRecvError::IpcError(_) => (),
                     ipc::TryRecvError::Empty =>
-                        eprintln!("Waiting for accesses from supervisor timed out!"),
+                        panic!("Waiting for accesses from supervisor timed out!"),
                 }
             })
             .ok();
-        // Do *not* leave the supervisor empty, or else we might get another fork...
-        *sv_guard = Some(sv);
-        ret
+
+        res.map(|v| (v, events))
     }
 }
 
 /// Initialises the supervisor process. If this function errors, then the
 /// supervisor process could not be created successfully; else, the caller
-/// is now the child process and can communicate via `start_ffi`/`end_ffi`,
-/// receiving back events through `get_events`.
+/// is now the child process and can communicate via `do_ffi`, receiving back
+/// events at the end.
 ///
 /// # Safety
-/// The invariants for `fork()` must be upheld by the caller.
+/// The invariants for `fork()` must be upheld by the caller, namely either:
+/// - Other threads do not exist, or;
+/// - If they do exist, either those threads or the resulting child process
+///   only ever act in [async-signal-safe](https://www.man7.org/linux/man-pages/man7/signal-safety.7.html) ways.
 pub unsafe fn init_sv() -> Result<(), SvInitError> {
     // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the
     // relevant missing bits.
@@ -191,8 +176,7 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
                 // The child process is free to unwind, so we won't to avoid doubly freeing
                 // system resources.
                 let init = std::panic::catch_unwind(|| {
-                    let listener =
-                        ChildListener { message_rx, attached: false, override_retcode: None };
+                    let listener = ChildListener::new(message_rx, confirm_tx.clone());
                     // Trace as many things as possible, to be able to handle them as needed.
                     let options = ptrace::Options::PTRACE_O_TRACESYSGOOD
                         | ptrace::Options::PTRACE_O_TRACECLONE
@@ -218,7 +202,9 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
                     // The "Ok" case means that we couldn't ptrace.
                     Ok(e) => return Err(e),
                     Err(p) => {
-                        eprintln!("Supervisor process panicked!\n{p:?}");
+                        eprintln!(
+                            "Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer."
+                        );
                         std::process::exit(1);
                     }
                 }
@@ -239,13 +225,11 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
 }
 
 /// Instruct the supervisor process to return a particular code. Useful if for
-/// whatever reason this code fails to be intercepted normally. In the case of
-/// `abort_if_errors()` used in `bin/miri.rs`, the return code is erroneously
-/// given as a 0 if this is not used.
+/// whatever reason this code fails to be intercepted normally.
 pub fn register_retcode_sv(code: i32) {
     let mut sv_guard = SUPERVISOR.lock().unwrap();
-    if let Some(sv) = sv_guard.take() {
+    if let Some(sv) = sv_guard.as_mut() {
         sv.message_tx.send(TraceRequest::OverrideRetcode(code)).unwrap();
-        *sv_guard = Some(sv);
+        sv.confirm_rx.recv().unwrap();
     }
 }
diff --git a/src/tools/miri/src/shims/native_lib/trace/messages.rs b/src/tools/miri/src/shims/native_lib/trace/messages.rs
index 8a83dab5c09d1..1f9df556b577f 100644
--- a/src/tools/miri/src/shims/native_lib/trace/messages.rs
+++ b/src/tools/miri/src/shims/native_lib/trace/messages.rs
@@ -1,25 +1,28 @@
 //! Houses the types that are directly sent across the IPC channels.
 //!
-//! The overall structure of a traced FFI call, from the child process's POV, is
-//! as follows:
+//! When forking to initialise the supervisor during `init_sv`, the child raises
+//! a `SIGSTOP`; if the parent successfully ptraces the child, it will allow it
+//! to resume. Else, the child will be killed by the parent.
+//!
+//! After initialisation is done, the overall structure of a traced FFI call from
+//! the child process's POV is as follows:
 //! ```
 //! message_tx.send(TraceRequest::StartFfi);
-//! confirm_rx.recv();
+//! confirm_rx.recv(); // receives a `Confirmation`
 //! raise(SIGSTOP);
 //! /* do ffi call */
 //! raise(SIGUSR1); // morally equivalent to some kind of "TraceRequest::EndFfi"
-//! let events = event_rx.recv();
+//! let events = event_rx.recv(); // receives a `MemEvents`
 //! ```
 //! `TraceRequest::OverrideRetcode` can be sent at any point in the above, including
-//! before or after all of them.
+//! before or after all of them. `confirm_rx.recv()` is to be called after, to ensure
+//! that the child does not exit before the supervisor has registered the return code.
 //!
 //! NB: sending these events out of order, skipping steps, etc. will result in
 //! unspecified behaviour from the supervisor process, so use the abstractions
-//! in `super::child` (namely `start_ffi()` and `end_ffi()`) to handle this. It is
+//! in `super::child` (namely `do_ffi()`) to handle this. It is
 //! trivially easy to cause a deadlock or crash by messing this up!
 
-use std::ops::Range;
-
 /// An IPC request sent by the child process to the parent.
 ///
 /// The sender for this channel should live on the child process.
@@ -34,6 +37,8 @@ pub enum TraceRequest {
     StartFfi(StartFfiInfo),
     /// Manually overrides the code that the supervisor will return upon exiting.
     /// Once set, it is permanent. This can be called again to change the value.
+    ///
+    /// After sending this, the child must wait to receive a `Confirmation`.
     OverrideRetcode(i32),
 }
 
@@ -41,7 +46,7 @@ pub enum TraceRequest {
 #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
 pub struct StartFfiInfo {
     /// A vector of page addresses. These should have been automatically obtained
-    /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`.
+    /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::start_ffi`.
     pub page_ptrs: Vec<usize>,
     /// The address of an allocation that can serve as a temporary stack.
     /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int.
@@ -54,27 +59,3 @@ pub struct StartFfiInfo {
 /// The sender for this channel should live on the parent process.
 #[derive(serde::Serialize, serde::Deserialize, Debug)]
 pub struct Confirmation;
-
-/// The final results of an FFI trace, containing every relevant event detected
-/// by the tracer. Sent by the supervisor after receiving a `SIGUSR1` signal.
-///
-/// The sender for this channel should live on the parent process.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
-pub struct MemEvents {
-    /// An ordered list of memory accesses that occurred. These should be assumed
-    /// to be overcautious; that is, if the size of an access is uncertain it is
-    /// pessimistically rounded up, and if the type (read/write/both) is uncertain
-    /// it is reported as whatever would be safest to assume; i.e. a read + maybe-write
-    /// becomes a read + write, etc.
-    pub acc_events: Vec<AccessEvent>,
-}
-
-/// A single memory access, conservatively overestimated
-/// in case of ambiguity.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
-pub enum AccessEvent {
-    /// A read may have occurred on no more than the specified address range.
-    Read(Range<usize>),
-    /// A write may have occurred on no more than the specified address range.
-    Write(Range<usize>),
-}
diff --git a/src/tools/miri/src/shims/native_lib/trace/mod.rs b/src/tools/miri/src/shims/native_lib/trace/mod.rs
index 174b06b3ac56c..c8abacfb5e222 100644
--- a/src/tools/miri/src/shims/native_lib/trace/mod.rs
+++ b/src/tools/miri/src/shims/native_lib/trace/mod.rs
@@ -5,4 +5,6 @@ mod parent;
 pub use self::child::{Supervisor, init_sv, register_retcode_sv};
 
 /// The size of the temporary stack we use for callbacks that the server executes in the client.
+/// This should be big enough that `mempr_on` and `mempr_off` can safely be jumped into with the
+/// stack pointer pointing to a "stack" of this size without overflowing it.
 const CALLBACK_STACK_SIZE: usize = 1024;
diff --git a/src/tools/miri/src/shims/native_lib/trace/parent.rs b/src/tools/miri/src/shims/native_lib/trace/parent.rs
index dfb0b35da699c..83f6c7a13fcfe 100644
--- a/src/tools/miri/src/shims/native_lib/trace/parent.rs
+++ b/src/tools/miri/src/shims/native_lib/trace/parent.rs
@@ -5,26 +5,17 @@ use nix::sys::{ptrace, signal, wait};
 use nix::unistd;
 
 use super::CALLBACK_STACK_SIZE;
-use super::messages::{AccessEvent, Confirmation, MemEvents, StartFfiInfo, TraceRequest};
+use super::messages::{Confirmation, StartFfiInfo, TraceRequest};
+use crate::shims::native_lib::{AccessEvent, AccessRange, MemEvents};
 
 /// The flags to use when calling `waitid()`.
-/// Since bitwise or on the nix version of these flags is implemented as a trait,
-/// this cannot be const directly so we do it this way.
 const WAIT_FLAGS: wait::WaitPidFlag =
-    wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED);
-
-/// Arch-specific maximum size a single access might perform. x86 value is set
-/// assuming nothing bigger than AVX-512 is available.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-const ARCH_MAX_ACCESS_SIZE: usize = 64;
-/// The largest arm64 simd instruction operates on 16 bytes.
-#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
-const ARCH_MAX_ACCESS_SIZE: usize = 16;
+    wait::WaitPidFlag::WUNTRACED.union(wait::WaitPidFlag::WEXITED);
 
 /// The default word size on a given platform, in bytes.
-#[cfg(any(target_arch = "x86", target_arch = "arm"))]
+#[cfg(target_arch = "x86")]
 const ARCH_WORD_SIZE: usize = 4;
-#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
+#[cfg(target_arch = "x86_64")]
 const ARCH_WORD_SIZE: usize = 8;
 
 /// The address of the page set to be edited, initialised to a sentinel null
@@ -53,39 +44,25 @@ trait ArchIndependentRegs {
 // It's fine / desirable behaviour for values to wrap here, we care about just
 // preserving the bit pattern.
 #[cfg(target_arch = "x86_64")]
-#[expect(clippy::as_conversions)]
 #[rustfmt::skip]
 impl ArchIndependentRegs for libc::user_regs_struct {
     #[inline]
-    fn ip(&self) -> usize { self.rip as _ }
+    fn ip(&self) -> usize { self.rip.try_into().unwrap() }
     #[inline]
-    fn set_ip(&mut self, ip: usize) { self.rip = ip as _ }
+    fn set_ip(&mut self, ip: usize) { self.rip = ip.try_into().unwrap() }
     #[inline]
-    fn set_sp(&mut self, sp: usize) { self.rsp = sp as _ }
+    fn set_sp(&mut self, sp: usize) { self.rsp = sp.try_into().unwrap() }
 }
 
 #[cfg(target_arch = "x86")]
-#[expect(clippy::as_conversions)]
 #[rustfmt::skip]
 impl ArchIndependentRegs for libc::user_regs_struct {
     #[inline]
-    fn ip(&self) -> usize { self.eip as _ }
+    fn ip(&self) -> usize { self.eip.cast_unsigned().try_into().unwrap() }
     #[inline]
-    fn set_ip(&mut self, ip: usize) { self.eip = ip as _ }
+    fn set_ip(&mut self, ip: usize) { self.eip = ip.cast_signed().try_into().unwrap() }
     #[inline]
-    fn set_sp(&mut self, sp: usize) { self.esp = sp as _ }
-}
-
-#[cfg(target_arch = "aarch64")]
-#[expect(clippy::as_conversions)]
-#[rustfmt::skip]
-impl ArchIndependentRegs for libc::user_regs_struct {
-    #[inline]
-    fn ip(&self) -> usize { self.pc as _ }
-    #[inline]
-    fn set_ip(&mut self, ip: usize) { self.pc = ip as _ }
-    #[inline]
-    fn set_sp(&mut self, sp: usize) { self.sp = sp as _ }
+    fn set_sp(&mut self, sp: usize) { self.esp = sp.cast_signed().try_into().unwrap() }
 }
 
 /// A unified event representing something happening on the child process. Wraps
@@ -109,11 +86,24 @@ pub enum ExecEvent {
 /// A listener for the FFI start info channel along with relevant state.
 pub struct ChildListener {
     /// The matching channel for the child's `Supervisor` struct.
-    pub message_rx: ipc::IpcReceiver<TraceRequest>,
+    message_rx: ipc::IpcReceiver<TraceRequest>,
+    /// ...
+    confirm_tx: ipc::IpcSender<Confirmation>,
     /// Whether an FFI call is currently ongoing.
-    pub attached: bool,
+    attached: bool,
     /// If `Some`, overrides the return code with the given value.
-    pub override_retcode: Option<i32>,
+    override_retcode: Option<i32>,
+    /// Last code obtained from a child exiting.
+    last_code: Option<i32>,
+}
+
+impl ChildListener {
+    pub fn new(
+        message_rx: ipc::IpcReceiver<TraceRequest>,
+        confirm_tx: ipc::IpcSender<Confirmation>,
+    ) -> Self {
+        Self { message_rx, confirm_tx, attached: false, override_retcode: None, last_code: None }
+    }
 }
 
 impl Iterator for ChildListener {
@@ -133,16 +123,10 @@ impl Iterator for ChildListener {
                 Ok(stat) =>
                     match stat {
                         // Child exited normally with a specific code set.
-                        wait::WaitStatus::Exited(_, code) => {
-                            let code = self.override_retcode.unwrap_or(code);
-                            return Some(ExecEvent::Died(Some(code)));
-                        }
+                        wait::WaitStatus::Exited(_, code) => self.last_code = Some(code),
                         // Child was killed by a signal, without giving a code.
-                        wait::WaitStatus::Signaled(_, _, _) =>
-                            return Some(ExecEvent::Died(self.override_retcode)),
-                        // Child entered a syscall. Since we're always technically
-                        // tracing, only pass this along if we're actively
-                        // monitoring the child.
+                        wait::WaitStatus::Signaled(_, _, _) => self.last_code = None,
+                        // Child entered or exited a syscall.
                         wait::WaitStatus::PtraceSyscall(pid) =>
                             if self.attached {
                                 return Some(ExecEvent::Syscall(pid));
@@ -179,10 +163,8 @@ impl Iterator for ChildListener {
                             },
                         _ => (),
                     },
-                // This case should only trigger if all children died and we
-                // somehow missed that, but it's best we not allow any room
-                // for deadlocks.
-                Err(_) => return Some(ExecEvent::Died(None)),
+                // This case should only trigger when all children died.
+                Err(_) => return Some(ExecEvent::Died(self.override_retcode.or(self.last_code))),
             }
 
             // Similarly, do a non-blocking poll of the IPC channel.
@@ -196,7 +178,10 @@ impl Iterator for ChildListener {
                             self.attached = true;
                             return Some(ExecEvent::Start(info));
                         },
-                    TraceRequest::OverrideRetcode(code) => self.override_retcode = Some(code),
+                    TraceRequest::OverrideRetcode(code) => {
+                        self.override_retcode = Some(code);
+                        self.confirm_tx.send(Confirmation).unwrap();
+                    }
                 }
             }
 
@@ -211,6 +196,12 @@ impl Iterator for ChildListener {
 #[derive(Debug)]
 pub struct ExecEnd(pub Option<i32>);
 
+/// Whether to call `ptrace::cont()` immediately. Used exclusively by `wait_for_signal`.
+enum InitialCont {
+    Yes,
+    No,
+}
+
 /// This is the main loop of the supervisor process. It runs in a separate
 /// process from the rest of Miri (but because we fork, addresses for anything
 /// created before the fork - like statics - are the same).
@@ -239,12 +230,12 @@ pub fn sv_loop(
     let mut curr_pid = init_pid;
 
     // There's an initial sigstop we need to deal with.
-    wait_for_signal(Some(curr_pid), signal::SIGSTOP, false)?;
+    wait_for_signal(Some(curr_pid), signal::SIGSTOP, InitialCont::No)?;
     ptrace::cont(curr_pid, None).unwrap();
 
     for evt in listener {
         match evt {
-            // start_ffi was called by the child, so prep memory.
+            // Child started ffi, so prep memory.
             ExecEvent::Start(ch_info) => {
                 // All the pages that the child process is "allowed to" access.
                 ch_pages = ch_info.page_ptrs;
@@ -252,17 +243,17 @@ pub fn sv_loop(
                 ch_stack = Some(ch_info.stack_ptr);
 
                 // We received the signal and are no longer in the main listener loop,
-                // so we can let the child move on to the end of start_ffi where it will
+                // so we can let the child move on to the end of the ffi prep where it will
                 // raise a SIGSTOP. We need it to be signal-stopped *and waited for* in
                 // order to do most ptrace operations!
                 confirm_tx.send(Confirmation).unwrap();
                 // We can't trust simply calling `Pid::this()` in the child process to give the right
                 // PID for us, so we get it this way.
-                curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap();
+                curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No).unwrap();
 
                 ptrace::syscall(curr_pid, None).unwrap();
             }
-            // end_ffi was called by the child.
+            // Child wants to end tracing.
             ExecEvent::End => {
                 // Hand over the access info we traced.
                 event_tx.send(MemEvents { acc_events }).unwrap();
@@ -322,10 +313,6 @@ fn get_disasm() -> capstone::Capstone {
         {cs_pre.x86().mode(arch::x86::ArchMode::Mode64)}
         #[cfg(target_arch = "x86")]
         {cs_pre.x86().mode(arch::x86::ArchMode::Mode32)}
-        #[cfg(target_arch = "aarch64")]
-        {cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)}
-        #[cfg(target_arch = "arm")]
-        {cs_pre.arm().mode(arch::arm::ArchMode::Arm)}
     }
     .detail(true)
     .build()
@@ -339,9 +326,9 @@ fn get_disasm() -> capstone::Capstone {
 fn wait_for_signal(
     pid: Option<unistd::Pid>,
     wait_signal: signal::Signal,
-    init_cont: bool,
+    init_cont: InitialCont,
 ) -> Result<unistd::Pid, ExecEnd> {
-    if init_cont {
+    if matches!(init_cont, InitialCont::Yes) {
         ptrace::cont(pid.unwrap(), None).unwrap();
     }
     // Repeatedly call `waitid` until we get the signal we want, or the process dies.
@@ -374,6 +361,84 @@ fn wait_for_signal(
     }
 }
 
+/// Add the memory events from `op` being executed while there is a memory access at `addr` to
+/// `acc_events`. Return whether this was a memory operand.
+fn capstone_find_events(
+    addr: usize,
+    op: &capstone::arch::ArchOperand,
+    acc_events: &mut Vec<AccessEvent>,
+) -> bool {
+    use capstone::prelude::*;
+    match op {
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        arch::ArchOperand::X86Operand(x86_operand) => {
+            match x86_operand.op_type {
+                // We only care about memory accesses
+                arch::x86::X86OperandType::Mem(_) => {
+                    let push = AccessRange { addr, size: x86_operand.size.into() };
+                    // It's called a "RegAccessType" but it also applies to memory
+                    let acc_ty = x86_operand.access.unwrap();
+                    // The same instruction might do both reads and writes, so potentially add both.
+                    // We do not know the order in which they happened, but writing and then reading
+                    // makes little sense so we put the read first. That is also the more
+                    // conservative choice.
+                    if acc_ty.is_readable() {
+                        acc_events.push(AccessEvent::Read(push.clone()));
+                    }
+                    if acc_ty.is_writable() {
+                        // FIXME: This could be made certain; either determine all cases where
+                        // only reads happen, or have an intermediate mempr_* function to first
+                        // map the page(s) as readonly and check if a segfault occurred.
+
+                        // Per https://docs.rs/iced-x86/latest/iced_x86/enum.OpAccess.html,
+                        // we know that the possible access types are Read, CondRead, Write,
+                        // CondWrite, ReadWrite, and ReadCondWrite. Since we got a segfault
+                        // we know some kind of access happened so Cond{Read, Write}s are
+                        // certain reads and writes; the only uncertainty is with an RW op
+                        // as it might be a ReadCondWrite with the write condition unmet.
+                        acc_events.push(AccessEvent::Write(push, !acc_ty.is_readable()));
+                    }
+
+                    return true;
+                }
+                _ => (),
+            }
+        }
+        // FIXME: arm64
+        _ => unimplemented!(),
+    }
+
+    false
+}
+
+/// Extract the events from the given instruction.
+fn capstone_disassemble(
+    instr: &[u8],
+    addr: usize,
+    cs: &capstone::Capstone,
+    acc_events: &mut Vec<AccessEvent>,
+) -> capstone::CsResult<()> {
+    // The arch_detail is what we care about, but it relies on these temporaries
+    // that we can't drop. 0x1000 is the default base address for Captsone, and
+    // we're expecting 1 instruction.
+    let insns = cs.disasm_count(instr, 0x1000, 1)?;
+    let ins_detail = cs.insn_detail(&insns[0])?;
+    let arch_detail = ins_detail.arch_detail();
+
+    let mut found_mem_op = false;
+
+    for op in arch_detail.operands() {
+        if capstone_find_events(addr, &op, acc_events) {
+            if found_mem_op {
+                panic!("more than one memory operand found; we don't know which one accessed what");
+            }
+            found_mem_op = true;
+        }
+    }
+
+    Ok(())
+}
+
 /// Grabs the access that caused a segfault and logs it down if it's to our memory,
 /// or kills the child and returns the appropriate error otherwise.
 fn handle_segfault(
@@ -384,116 +449,10 @@ fn handle_segfault(
     cs: &capstone::Capstone,
     acc_events: &mut Vec<AccessEvent>,
 ) -> Result<(), ExecEnd> {
-    /// This is just here to not pollute the main namespace with `capstone::prelude::*`.
-    #[inline]
-    fn capstone_disassemble(
-        instr: &[u8],
-        addr: usize,
-        cs: &capstone::Capstone,
-        acc_events: &mut Vec<AccessEvent>,
-    ) -> capstone::CsResult<()> {
-        use capstone::prelude::*;
-
-        // The arch_detail is what we care about, but it relies on these temporaries
-        // that we can't drop. 0x1000 is the default base address for Captsone, and
-        // we're expecting 1 instruction.
-        let insns = cs.disasm_count(instr, 0x1000, 1)?;
-        let ins_detail = cs.insn_detail(&insns[0])?;
-        let arch_detail = ins_detail.arch_detail();
-
-        for op in arch_detail.operands() {
-            match op {
-                #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-                arch::ArchOperand::X86Operand(x86_operand) => {
-                    match x86_operand.op_type {
-                        // We only care about memory accesses
-                        arch::x86::X86OperandType::Mem(_) => {
-                            let push = addr..addr.strict_add(usize::from(x86_operand.size));
-                            // It's called a "RegAccessType" but it also applies to memory
-                            let acc_ty = x86_operand.access.unwrap();
-                            if acc_ty.is_readable() {
-                                acc_events.push(AccessEvent::Read(push.clone()));
-                            }
-                            if acc_ty.is_writable() {
-                                acc_events.push(AccessEvent::Write(push));
-                            }
-                        }
-                        _ => (),
-                    }
-                }
-                #[cfg(target_arch = "aarch64")]
-                arch::ArchOperand::Arm64Operand(arm64_operand) => {
-                    // Annoyingly, we don't always get the size here, so just be pessimistic for now.
-                    match arm64_operand.op_type {
-                        arch::arm64::Arm64OperandType::Mem(_) => {
-                            // B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes.
-                            let size = match arm64_operand.vas {
-                                // Not an fp/simd instruction.
-                                arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE,
-                                // 1 byte.
-                                arch::arm64::Arm64Vas::ARM64_VAS_1B => 1,
-                                // 2 bytes.
-                                arch::arm64::Arm64Vas::ARM64_VAS_1H => 2,
-                                // 4 bytes.
-                                arch::arm64::Arm64Vas::ARM64_VAS_4B
-                                | arch::arm64::Arm64Vas::ARM64_VAS_2H
-                                | arch::arm64::Arm64Vas::ARM64_VAS_1S => 4,
-                                // 8 bytes.
-                                arch::arm64::Arm64Vas::ARM64_VAS_8B
-                                | arch::arm64::Arm64Vas::ARM64_VAS_4H
-                                | arch::arm64::Arm64Vas::ARM64_VAS_2S
-                                | arch::arm64::Arm64Vas::ARM64_VAS_1D => 8,
-                                // 16 bytes.
-                                arch::arm64::Arm64Vas::ARM64_VAS_16B
-                                | arch::arm64::Arm64Vas::ARM64_VAS_8H
-                                | arch::arm64::Arm64Vas::ARM64_VAS_4S
-                                | arch::arm64::Arm64Vas::ARM64_VAS_2D
-                                | arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16,
-                            };
-                            let push = addr..addr.strict_add(size);
-                            // FIXME: This now has access type info in the latest
-                            // git version of capstone because this pissed me off
-                            // and I added it. Change this when it updates.
-                            acc_events.push(AccessEvent::Read(push.clone()));
-                            acc_events.push(AccessEvent::Write(push));
-                        }
-                        _ => (),
-                    }
-                }
-                #[cfg(target_arch = "arm")]
-                arch::ArchOperand::ArmOperand(arm_operand) =>
-                    match arm_operand.op_type {
-                        arch::arm::ArmOperandType::Mem(_) => {
-                            // We don't get info on the size of the access, but
-                            // we're at least told if it's a vector instruction.
-                            let size = if arm_operand.vector_index.is_some() {
-                                ARCH_MAX_ACCESS_SIZE
-                            } else {
-                                ARCH_WORD_SIZE
-                            };
-                            let push = addr..addr.strict_add(size);
-                            let acc_ty = arm_operand.access.unwrap();
-                            if acc_ty.is_readable() {
-                                acc_events.push(AccessEvent::Read(push.clone()));
-                            }
-                            if acc_ty.is_writable() {
-                                acc_events.push(AccessEvent::Write(push));
-                            }
-                        }
-                        _ => (),
-                    },
-                _ => unimplemented!(),
-            }
-        }
-
-        Ok(())
-    }
-
     // Get information on what caused the segfault. This contains the address
     // that triggered it.
     let siginfo = ptrace::getsiginfo(pid).unwrap();
-    // All x86, ARM, etc. instructions only have at most one memory operand
-    // (thankfully!)
+    // All x86 instructions only have at most one memory operand (thankfully!)
     // SAFETY: si_addr is safe to call.
     let addr = unsafe { siginfo.si_addr().addr() };
     let page_addr = addr.strict_sub(addr.strict_rem(page_size));
@@ -515,7 +474,7 @@ fn handle_segfault(
     //   global atomic variables. This is what we use the temporary callback stack for.
     // - Step 1 instruction
     // - Parse executed code to estimate size & type of access
-    // - Reprotect the memory by executing `mempr_on` in the child.
+    // - Reprotect the memory by executing `mempr_on` in the child, using the callback stack again.
     // - Continue
 
     // Ensure the stack is properly zeroed out!
@@ -540,7 +499,7 @@ fn handle_segfault(
     ptrace::write(
         pid,
         (&raw const PAGE_ADDR).cast_mut().cast(),
-        libc::c_long::try_from(page_addr).unwrap(),
+        libc::c_long::try_from(page_addr.cast_signed()).unwrap(),
     )
     .unwrap();
 
@@ -552,7 +511,7 @@ fn handle_segfault(
     ptrace::setregs(pid, new_regs).unwrap();
 
     // Our mempr_* functions end with a raise(SIGSTOP).
-    wait_for_signal(Some(pid), signal::SIGSTOP, true)?;
+    wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?;
 
     // Step 1 instruction.
     ptrace::setregs(pid, regs_bak).unwrap();
@@ -573,6 +532,12 @@ fn handle_segfault(
     let regs_bak = ptrace::getregs(pid).unwrap();
     new_regs = regs_bak;
     let ip_poststep = regs_bak.ip();
+
+    // Ensure that we've actually gone forwards.
+    assert!(ip_poststep > ip_prestep);
+    // But not by too much. 64 bytes should be "big enough" on ~any architecture.
+    assert!(ip_prestep.strict_add(64) > ip_poststep);
+
     // We need to do reads/writes in word-sized chunks.
     let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE);
     let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| {
@@ -587,20 +552,14 @@ fn handle_segfault(
     });
 
     // Now figure out the size + type of access and log it down.
-    // This will mark down e.g. the same area being read multiple times,
-    // since it's more efficient to compress the accesses at the end.
-    if capstone_disassemble(&instr, addr, cs, acc_events).is_err() {
-        // Read goes first because we need to be pessimistic.
-        acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
-        acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
-    }
+    capstone_disassemble(&instr, addr, cs, acc_events).expect("Failed to disassemble instruction");
 
     // Reprotect everything and continue.
     #[expect(clippy::as_conversions)]
     new_regs.set_ip(mempr_on as usize);
     new_regs.set_sp(stack_ptr);
     ptrace::setregs(pid, new_regs).unwrap();
-    wait_for_signal(Some(pid), signal::SIGSTOP, true)?;
+    wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?;
 
     ptrace::setregs(pid, regs_bak).unwrap();
     ptrace::syscall(pid, None).unwrap();
diff --git a/src/tools/miri/src/shims/native_lib/trace/stub.rs b/src/tools/miri/src/shims/native_lib/trace/stub.rs
new file mode 100644
index 0000000000000..22787a6f6fa78
--- /dev/null
+++ b/src/tools/miri/src/shims/native_lib/trace/stub.rs
@@ -0,0 +1,34 @@
+use rustc_const_eval::interpret::InterpResult;
+
+static SUPERVISOR: std::sync::Mutex<()> = std::sync::Mutex::new(());
+
+pub struct Supervisor;
+
+#[derive(Debug)]
+pub struct SvInitError;
+
+impl Supervisor {
+    #[inline(always)]
+    pub fn is_enabled() -> bool {
+        false
+    }
+
+    pub fn do_ffi<'tcx, T>(
+        _: T,
+        f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>,
+    ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<super::MemEvents>)> {
+        // We acquire the lock to ensure that no two FFI calls run concurrently.
+        let _g = SUPERVISOR.lock().unwrap();
+        f().map(|v| (v, None))
+    }
+}
+
+#[inline(always)]
+#[allow(dead_code, clippy::missing_safety_doc)]
+pub unsafe fn init_sv() -> Result<!, SvInitError> {
+    Err(SvInitError)
+}
+
+#[inline(always)]
+#[allow(dead_code)]
+pub fn register_retcode_sv<T>(_: T) {}
diff --git a/src/tools/miri/src/shims/panic.rs b/src/tools/miri/src/shims/panic.rs
index 9490761d0c9bb..85564e47685f2 100644
--- a/src/tools/miri/src/shims/panic.rs
+++ b/src/tools/miri/src/shims/panic.rs
@@ -1,162 +1,12 @@
-//! Panic runtime for Miri.
-//!
-//! The core pieces of the runtime are:
-//! - An implementation of `__rust_maybe_catch_panic` that pushes the invoked stack frame with
-//!   some extra metadata derived from the panic-catching arguments of `__rust_maybe_catch_panic`.
-//! - A hack in `libpanic_unwind` that calls the `miri_start_unwind` intrinsic instead of the
-//!   target-native panic runtime. (This lives in the rustc repo.)
-//! - An implementation of `miri_start_unwind` that stores its argument (the panic payload), and then
-//!   immediately returns, but on the *unwind* edge (not the normal return edge), thus initiating unwinding.
-//! - A hook executed each time a frame is popped, such that if the frame pushed by `__rust_maybe_catch_panic`
-//!   gets popped *during unwinding*, we take the panic payload and store it according to the extra
-//!   metadata we remembered when pushing said frame.
+//! Helper functions for causing panics.
 
 use rustc_abi::ExternAbi;
 use rustc_middle::{mir, ty};
-use rustc_target::spec::PanicStrategy;
 
-use self::helpers::check_intrinsic_arg_count;
 use crate::*;
 
-/// Holds all of the relevant data for when unwinding hits a `try` frame.
-#[derive(Debug)]
-pub struct CatchUnwindData<'tcx> {
-    /// The `catch_fn` callback to call in case of a panic.
-    catch_fn: Pointer,
-    /// The `data` argument for that callback.
-    data: ImmTy<'tcx>,
-    /// The return place from the original call to `try`.
-    dest: MPlaceTy<'tcx>,
-    /// The return block from the original call to `try`.
-    ret: Option<mir::BasicBlock>,
-}
-
-impl VisitProvenance for CatchUnwindData<'_> {
-    fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
-        let CatchUnwindData { catch_fn, data, dest, ret: _ } = self;
-        catch_fn.visit_provenance(visit);
-        data.visit_provenance(visit);
-        dest.visit_provenance(visit);
-    }
-}
-
 impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
 pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
-    /// Handles the special `miri_start_unwind` intrinsic, which is called
-    /// by libpanic_unwind to delegate the actual unwinding process to Miri.
-    fn handle_miri_start_unwind(&mut self, payload: &OpTy<'tcx>) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-
-        trace!("miri_start_unwind: {:?}", this.frame().instance());
-
-        let payload = this.read_immediate(payload)?;
-        let thread = this.active_thread_mut();
-        thread.panic_payloads.push(payload);
-
-        interp_ok(())
-    }
-
-    /// Handles the `catch_unwind` intrinsic.
-    fn handle_catch_unwind(
-        &mut self,
-        args: &[OpTy<'tcx>],
-        dest: &MPlaceTy<'tcx>,
-        ret: Option<mir::BasicBlock>,
-    ) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-
-        // Signature:
-        //   fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32
-        // Calls `try_fn` with `data` as argument. If that executes normally, returns 0.
-        // If that unwinds, calls `catch_fn` with the first argument being `data` and
-        // then second argument being a target-dependent `payload` (i.e. it is up to us to define
-        // what that is), and returns 1.
-        // The `payload` is passed (by libstd) to `__rust_panic_cleanup`, which is then expected to
-        // return a `Box<dyn Any + Send + 'static>`.
-        // In Miri, `miri_start_unwind` is passed exactly that type, so we make the `payload` simply
-        // a pointer to `Box<dyn Any + Send + 'static>`.
-
-        // Get all the arguments.
-        let [try_fn, data, catch_fn] = check_intrinsic_arg_count(args)?;
-        let try_fn = this.read_pointer(try_fn)?;
-        let data = this.read_immediate(data)?;
-        let catch_fn = this.read_pointer(catch_fn)?;
-
-        // Now we make a function call, and pass `data` as first and only argument.
-        let f_instance = this.get_ptr_fn(try_fn)?.as_instance()?;
-        trace!("try_fn: {:?}", f_instance);
-        #[allow(clippy::cloned_ref_to_slice_refs)] // the code is clearer as-is
-        this.call_function(
-            f_instance,
-            ExternAbi::Rust,
-            &[data.clone()],
-            None,
-            // Directly return to caller.
-            ReturnContinuation::Goto { ret, unwind: mir::UnwindAction::Continue },
-        )?;
-
-        // We ourselves will return `0`, eventually (will be overwritten if we catch a panic).
-        this.write_null(dest)?;
-
-        // In unwind mode, we tag this frame with the extra data needed to catch unwinding.
-        // This lets `handle_stack_pop` (below) know that we should stop unwinding
-        // when we pop this frame.
-        if this.tcx.sess.panic_strategy() == PanicStrategy::Unwind {
-            this.frame_mut().extra.catch_unwind =
-                Some(CatchUnwindData { catch_fn, data, dest: dest.clone(), ret });
-        }
-
-        interp_ok(())
-    }
-
-    fn handle_stack_pop_unwind(
-        &mut self,
-        mut extra: FrameExtra<'tcx>,
-        unwinding: bool,
-    ) -> InterpResult<'tcx, ReturnAction> {
-        let this = self.eval_context_mut();
-        trace!("handle_stack_pop_unwind(extra = {:?}, unwinding = {})", extra, unwinding);
-
-        // We only care about `catch_panic` if we're unwinding - if we're doing a normal
-        // return, then we don't need to do anything special.
-        if let (true, Some(catch_unwind)) = (unwinding, extra.catch_unwind.take()) {
-            // We've just popped a frame that was pushed by `catch_unwind`,
-            // and we are unwinding, so we should catch that.
-            trace!(
-                "unwinding: found catch_panic frame during unwinding: {:?}",
-                this.frame().instance()
-            );
-
-            // We set the return value of `catch_unwind` to 1, since there was a panic.
-            this.write_scalar(Scalar::from_i32(1), &catch_unwind.dest)?;
-
-            // The Thread's `panic_payload` holds what was passed to `miri_start_unwind`.
-            // This is exactly the second argument we need to pass to `catch_fn`.
-            let payload = this.active_thread_mut().panic_payloads.pop().unwrap();
-
-            // Push the `catch_fn` stackframe.
-            let f_instance = this.get_ptr_fn(catch_unwind.catch_fn)?.as_instance()?;
-            trace!("catch_fn: {:?}", f_instance);
-            this.call_function(
-                f_instance,
-                ExternAbi::Rust,
-                &[catch_unwind.data, payload],
-                None,
-                // Directly return to caller of `catch_unwind`.
-                ReturnContinuation::Goto {
-                    ret: catch_unwind.ret,
-                    // `catch_fn` must not unwind.
-                    unwind: mir::UnwindAction::Unreachable,
-                },
-            )?;
-
-            // We pushed a new stack frame, the engine should not do any jumping now!
-            interp_ok(ReturnAction::NoJump)
-        } else {
-            interp_ok(ReturnAction::Normal)
-        }
-    }
-
     /// Start a panic in the interpreter with the given message as payload.
     fn start_panic(&mut self, msg: &str, unwind: mir::UnwindAction) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
diff --git a/src/tools/miri/src/shims/tls.rs b/src/tools/miri/src/shims/tls.rs
index 7182637437a16..1200029692dc3 100644
--- a/src/tools/miri/src/shims/tls.rs
+++ b/src/tools/miri/src/shims/tls.rs
@@ -302,7 +302,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         // Windows has a special magic linker section that is run on certain events.
         // We don't support most of that, but just enough to make thread-local dtors in `std` work.
-        interp_ok(this.lookup_link_section(".CRT$XLB")?)
+        interp_ok(this.lookup_link_section(|section| section == ".CRT$XLB")?)
     }
 
     fn schedule_windows_tls_dtor(&mut self, dtor: ImmTy<'tcx>) -> InterpResult<'tcx> {
diff --git a/src/tools/miri/src/shims/unix/fs.rs b/src/tools/miri/src/shims/unix/fs.rs
index 0f7d453b296c9..0f2878ad26c85 100644
--- a/src/tools/miri/src/shims/unix/fs.rs
+++ b/src/tools/miri/src/shims/unix/fs.rs
@@ -132,12 +132,12 @@ trait EvalContextExtPrivate<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let buf = this.deref_pointer_as(buf_op, this.libc_ty_layout("stat"))?;
         this.write_int_fields_named(
             &[
-                ("st_dev", 0),
+                ("st_dev", metadata.dev.into()),
                 ("st_mode", mode.try_into().unwrap()),
                 ("st_nlink", 0),
                 ("st_ino", 0),
-                ("st_uid", 0),
-                ("st_gid", 0),
+                ("st_uid", metadata.uid.into()),
+                ("st_gid", metadata.gid.into()),
                 ("st_rdev", 0),
                 ("st_atime", access_sec.into()),
                 ("st_mtime", modified_sec.into()),
@@ -1544,6 +1544,9 @@ struct FileMetadata {
     created: Option<(u64, u32)>,
     accessed: Option<(u64, u32)>,
     modified: Option<(u64, u32)>,
+    dev: u64,
+    uid: u32,
+    gid: u32,
 }
 
 impl FileMetadata {
@@ -1601,6 +1604,21 @@ impl FileMetadata {
         let modified = extract_sec_and_nsec(metadata.modified())?;
 
         // FIXME: Provide more fields using platform specific methods.
-        interp_ok(Ok(FileMetadata { mode, size, created, accessed, modified }))
+
+        cfg_select! {
+            unix => {
+                use std::os::unix::fs::MetadataExt;
+                let dev = metadata.dev();
+                let uid = metadata.uid();
+                let gid = metadata.gid();
+            }
+            _ => {
+                let dev = 0;
+                let uid = 0;
+                let gid = 0;
+            }
+        }
+
+        interp_ok(Ok(FileMetadata { mode, size, created, accessed, modified, dev, uid, gid }))
     }
 }
diff --git a/src/tools/miri/src/shims/unwind.rs b/src/tools/miri/src/shims/unwind.rs
new file mode 100644
index 0000000000000..ba0c50b54b47a
--- /dev/null
+++ b/src/tools/miri/src/shims/unwind.rs
@@ -0,0 +1,160 @@
+//! Unwinding runtime for Miri.
+//!
+//! The core pieces of the runtime are:
+//! - An implementation of `catch_unwind` that pushes the invoked stack frame with
+//!   some extra metadata derived from the panic-catching arguments of `catch_unwind`.
+//! - A hack in `libpanic_unwind` that calls the `miri_start_unwind` intrinsic instead of the
+//!   target-native panic runtime. (This lives in the rustc repo.)
+//! - An implementation of `miri_start_unwind` that stores its argument (the panic payload), and
+//!   then immediately returns, but on the *unwind* edge (not the normal return edge), thus
+//!   initiating unwinding.
+//! - A hook executed each time a frame is popped, such that if the frame pushed by `catch_unwind`
+//!   gets popped *during unwinding*, we take the panic payload and store it according to the extra
+//!   metadata we remembered when pushing said frame.
+
+use rustc_abi::ExternAbi;
+use rustc_middle::mir;
+use rustc_target::spec::PanicStrategy;
+
+use self::helpers::check_intrinsic_arg_count;
+use crate::*;
+
+/// Holds all of the relevant data for when unwinding hits a `try` frame.
+#[derive(Debug)]
+pub struct CatchUnwindData<'tcx> {
+    /// The `catch_fn` callback to call in case of a panic.
+    catch_fn: Pointer,
+    /// The `data` argument for that callback.
+    data: ImmTy<'tcx>,
+    /// The return place from the original call to `try`.
+    dest: MPlaceTy<'tcx>,
+    /// The return block from the original call to `try`.
+    ret: Option<mir::BasicBlock>,
+}
+
+impl VisitProvenance for CatchUnwindData<'_> {
+    fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
+        let CatchUnwindData { catch_fn, data, dest, ret: _ } = self;
+        catch_fn.visit_provenance(visit);
+        data.visit_provenance(visit);
+        dest.visit_provenance(visit);
+    }
+}
+
+impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
+pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
+    /// Handles the special `miri_start_unwind` intrinsic, which is called
+    /// by libpanic_unwind to delegate the actual unwinding process to Miri.
+    fn handle_miri_start_unwind(&mut self, payload: &OpTy<'tcx>) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        trace!("miri_start_unwind: {:?}", this.frame().instance());
+
+        let payload = this.read_immediate(payload)?;
+        let thread = this.active_thread_mut();
+        thread.unwind_payloads.push(payload);
+
+        interp_ok(())
+    }
+
+    /// Handles the `catch_unwind` intrinsic.
+    fn handle_catch_unwind(
+        &mut self,
+        args: &[OpTy<'tcx>],
+        dest: &MPlaceTy<'tcx>,
+        ret: Option<mir::BasicBlock>,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        // Signature:
+        //   fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32
+        // Calls `try_fn` with `data` as argument. If that executes normally, returns 0.
+        // If that unwinds, calls `catch_fn` with the first argument being `data` and
+        // then second argument being a target-dependent `payload` (i.e. it is up to us to define
+        // what that is), and returns 1.
+        // The `payload` is passed (by libstd) to `__rust_panic_cleanup`, which is then expected to
+        // return a `Box<dyn Any + Send + 'static>`.
+        // In Miri, `miri_start_unwind` is passed exactly that type, so we make the `payload` simply
+        // a pointer to `Box<dyn Any + Send + 'static>`.
+
+        // Get all the arguments.
+        let [try_fn, data, catch_fn] = check_intrinsic_arg_count(args)?;
+        let try_fn = this.read_pointer(try_fn)?;
+        let data = this.read_immediate(data)?;
+        let catch_fn = this.read_pointer(catch_fn)?;
+
+        // Now we make a function call, and pass `data` as first and only argument.
+        let f_instance = this.get_ptr_fn(try_fn)?.as_instance()?;
+        trace!("try_fn: {:?}", f_instance);
+        #[allow(clippy::cloned_ref_to_slice_refs)] // the code is clearer as-is
+        this.call_function(
+            f_instance,
+            ExternAbi::Rust,
+            &[data.clone()],
+            None,
+            // Directly return to caller.
+            ReturnContinuation::Goto { ret, unwind: mir::UnwindAction::Continue },
+        )?;
+
+        // We ourselves will return `0`, eventually (will be overwritten if we catch a panic).
+        this.write_null(dest)?;
+
+        // In unwind mode, we tag this frame with the extra data needed to catch unwinding.
+        // This lets `handle_stack_pop` (below) know that we should stop unwinding
+        // when we pop this frame.
+        if this.tcx.sess.panic_strategy() == PanicStrategy::Unwind {
+            this.frame_mut().extra.catch_unwind =
+                Some(CatchUnwindData { catch_fn, data, dest: dest.clone(), ret });
+        }
+
+        interp_ok(())
+    }
+
+    fn handle_stack_pop_unwind(
+        &mut self,
+        mut extra: FrameExtra<'tcx>,
+        unwinding: bool,
+    ) -> InterpResult<'tcx, ReturnAction> {
+        let this = self.eval_context_mut();
+        trace!("handle_stack_pop_unwind(extra = {:?}, unwinding = {})", extra, unwinding);
+
+        // We only care about `catch_panic` if we're unwinding - if we're doing a normal
+        // return, then we don't need to do anything special.
+        if let (true, Some(catch_unwind)) = (unwinding, extra.catch_unwind.take()) {
+            // We've just popped a frame that was pushed by `catch_unwind`,
+            // and we are unwinding, so we should catch that.
+            trace!(
+                "unwinding: found catch_panic frame during unwinding: {:?}",
+                this.frame().instance()
+            );
+
+            // We set the return value of `catch_unwind` to 1, since there was a panic.
+            this.write_scalar(Scalar::from_i32(1), &catch_unwind.dest)?;
+
+            // The Thread's `panic_payload` holds what was passed to `miri_start_unwind`.
+            // This is exactly the second argument we need to pass to `catch_fn`.
+            let payload = this.active_thread_mut().unwind_payloads.pop().unwrap();
+
+            // Push the `catch_fn` stackframe.
+            let f_instance = this.get_ptr_fn(catch_unwind.catch_fn)?.as_instance()?;
+            trace!("catch_fn: {:?}", f_instance);
+            this.call_function(
+                f_instance,
+                ExternAbi::Rust,
+                &[catch_unwind.data, payload],
+                None,
+                // Directly return to caller of `catch_unwind`.
+                ReturnContinuation::Goto {
+                    ret: catch_unwind.ret,
+                    // `catch_fn` must not unwind.
+                    unwind: mir::UnwindAction::Unreachable,
+                },
+            )?;
+
+            // We pushed a new stack frame, the engine should not do any jumping now!
+            interp_ok(ReturnAction::NoJump)
+        } else {
+            interp_ok(ReturnAction::Normal)
+        }
+    }
+}
diff --git a/src/tools/miri/tests/native-lib/fail/tracing/partial_init.rs b/src/tools/miri/tests/native-lib/fail/tracing/partial_init.rs
new file mode 100644
index 0000000000000..e267f82e215b5
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/tracing/partial_init.rs
@@ -0,0 +1,25 @@
+//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
+//@compile-flags: -Zmiri-native-lib-enable-tracing
+
+extern "C" {
+    fn init_n(n: i32, ptr: *mut u8);
+}
+
+fn main() {
+    partial_init();
+}
+
+// Initialise the first 2 elements of the slice from native code, and check
+// that the 3rd is correctly deemed uninit.
+fn partial_init() {
+    let mut slice = std::mem::MaybeUninit::<[u8; 3]>::uninit();
+    let slice_ptr = slice.as_mut_ptr().cast::<u8>();
+    unsafe {
+        // Initialize the first two elements.
+        init_n(2, slice_ptr);
+        assert!(*slice_ptr == 0);
+        assert!(*slice_ptr.offset(1) == 0);
+        // Reading the third is UB!
+        let _val = *slice_ptr.offset(2); //~ ERROR: Undefined Behavior: using uninitialized data
+    }
+}
diff --git a/src/tools/miri/tests/native-lib/fail/tracing/partial_init.stderr b/src/tools/miri/tests/native-lib/fail/tracing/partial_init.stderr
new file mode 100644
index 0000000000000..84fd913b5e5b3
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/tracing/partial_init.stderr
@@ -0,0 +1,39 @@
+warning: sharing memory with a native function called via FFI
+  --> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
+   |
+LL |         init_n(2, slice_ptr);
+   |         ^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
+   |
+   = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
+   = help: in particular, Miri assumes that the native call initializes all memory it has written to
+   = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
+   = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
+   = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
+   = note: BACKTRACE:
+   = note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC
+note: inside `main`
+  --> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
+   |
+LL |     partial_init();
+   |     ^^^^^^^^^^^^^^
+
+error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
+  --> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
+   |
+LL |         let _val = *slice_ptr.offset(2);
+   |                    ^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE:
+   = note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC
+note: inside `main`
+  --> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
+   |
+LL |     partial_init();
+   |     ^^^^^^^^^^^^^^
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error; 1 warning emitted
+
diff --git a/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs b/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs
new file mode 100644
index 0000000000000..b78c29d98dbb6
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs
@@ -0,0 +1,29 @@
+//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
+//@compile-flags: -Zmiri-permissive-provenance -Zmiri-native-lib-enable-tracing
+
+extern "C" {
+    fn do_one_deref(ptr: *const *const *const i32) -> usize;
+}
+
+fn main() {
+    unexposed_reachable_alloc();
+}
+
+// Expose 2 pointers by virtue of doing a native read and assert that the 3rd in
+// the chain remains properly unexposed.
+fn unexposed_reachable_alloc() {
+    let inner = 42;
+    let intermediate_a = &raw const inner;
+    let intermediate_b = &raw const intermediate_a;
+    let exposed = &raw const intermediate_b;
+    // Discard the return value; it's just there so the access in C doesn't get optimised away.
+    unsafe { do_one_deref(exposed) };
+    // Native read should have exposed the address of intermediate_b...
+    let valid: *const i32 = std::ptr::with_exposed_provenance(intermediate_b.addr());
+    // but not of intermediate_a.
+    let invalid: *const i32 = std::ptr::with_exposed_provenance(intermediate_a.addr());
+    unsafe {
+        let _ok = *valid;
+        let _not_ok = *invalid; //~ ERROR: Undefined Behavior: memory access failed: attempting to access
+    }
+}
diff --git a/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.stderr b/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.stderr
new file mode 100644
index 0000000000000..2d34dac1b3ff3
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/fail/tracing/unexposed_reachable_alloc.stderr
@@ -0,0 +1,39 @@
+warning: sharing memory with a native function called via FFI
+  --> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
+   |
+LL |     unsafe { do_one_deref(exposed) };
+   |              ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
+   |
+   = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
+   = help: in particular, Miri assumes that the native call initializes all memory it has written to
+   = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
+   = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
+   = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
+   = note: BACKTRACE:
+   = note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
+note: inside `main`
+  --> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
+   |
+LL |     unexposed_reachable_alloc();
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+error: Undefined Behavior: memory access failed: attempting to access 4 bytes, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
+  --> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
+   |
+LL |         let _not_ok = *invalid;
+   |                       ^^^^^^^^ Undefined Behavior occurred here
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE:
+   = note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
+note: inside `main`
+  --> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
+   |
+LL |     unexposed_reachable_alloc();
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error; 1 warning emitted
+
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr b/src/tools/miri/tests/native-lib/pass/ptr_read_access.notrace.stderr
similarity index 100%
rename from src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr
rename to src/tools/miri/tests/native-lib/pass/ptr_read_access.notrace.stderr
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.stdout b/src/tools/miri/tests/native-lib/pass/ptr_read_access.notrace.stdout
similarity index 100%
rename from src/tools/miri/tests/native-lib/pass/ptr_read_access.stdout
rename to src/tools/miri/tests/native-lib/pass/ptr_read_access.notrace.stdout
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs b/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs
index 4c85213536785..4f3c37f00c1d7 100644
--- a/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs
+++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.rs
@@ -1,3 +1,7 @@
+//@revisions: trace notrace
+//@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
+//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing
+
 fn main() {
     test_access_pointer();
     test_access_simple();
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stderr b/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stderr
new file mode 100644
index 0000000000000..c2a4508b7fcc7
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stderr
@@ -0,0 +1,19 @@
+warning: sharing memory with a native function called via FFI
+  --> tests/native-lib/pass/ptr_read_access.rs:LL:CC
+   |
+LL |     unsafe { print_pointer(&x) };
+   |              ^^^^^^^^^^^^^^^^^ sharing memory with a native function
+   |
+   = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
+   = help: in particular, Miri assumes that the native call initializes all memory it has written to
+   = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
+   = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
+   = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
+   = note: BACKTRACE:
+   = note: inside `test_access_pointer` at tests/native-lib/pass/ptr_read_access.rs:LL:CC
+note: inside `main`
+  --> tests/native-lib/pass/ptr_read_access.rs:LL:CC
+   |
+LL |     test_access_pointer();
+   |     ^^^^^^^^^^^^^^^^^^^^^
+
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stdout b/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stdout
new file mode 100644
index 0000000000000..1a8799abfc93e
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.trace.stdout
@@ -0,0 +1 @@
+printing pointer dereference from C: 42
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr b/src/tools/miri/tests/native-lib/pass/ptr_write_access.notrace.stderr
similarity index 100%
rename from src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr
rename to src/tools/miri/tests/native-lib/pass/ptr_write_access.notrace.stderr
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_write_access.rs b/src/tools/miri/tests/native-lib/pass/ptr_write_access.rs
index 86a9c97f4cecc..57def78b0ab17 100644
--- a/src/tools/miri/tests/native-lib/pass/ptr_write_access.rs
+++ b/src/tools/miri/tests/native-lib/pass/ptr_write_access.rs
@@ -1,3 +1,6 @@
+//@revisions: trace notrace
+//@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
+//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing
 //@compile-flags: -Zmiri-permissive-provenance
 
 #![feature(box_as_ptr)]
diff --git a/src/tools/miri/tests/native-lib/pass/ptr_write_access.trace.stderr b/src/tools/miri/tests/native-lib/pass/ptr_write_access.trace.stderr
new file mode 100644
index 0000000000000..dbf021b15bec4
--- /dev/null
+++ b/src/tools/miri/tests/native-lib/pass/ptr_write_access.trace.stderr
@@ -0,0 +1,19 @@
+warning: sharing memory with a native function called via FFI
+  --> tests/native-lib/pass/ptr_write_access.rs:LL:CC
+   |
+LL |     unsafe { increment_int(&mut x) };
+   |              ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
+   |
+   = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
+   = help: in particular, Miri assumes that the native call initializes all memory it has written to
+   = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
+   = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
+   = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
+   = note: BACKTRACE:
+   = note: inside `test_increment_int` at tests/native-lib/pass/ptr_write_access.rs:LL:CC
+note: inside `main`
+  --> tests/native-lib/pass/ptr_write_access.rs:LL:CC
+   |
+LL |     test_increment_int();
+   |     ^^^^^^^^^^^^^^^^^^^^
+
diff --git a/src/tools/miri/tests/native-lib/ptr_read_access.c b/src/tools/miri/tests/native-lib/ptr_read_access.c
index b89126d3d7c88..021eb6adca4f6 100644
--- a/src/tools/miri/tests/native-lib/ptr_read_access.c
+++ b/src/tools/miri/tests/native-lib/ptr_read_access.c
@@ -49,3 +49,9 @@ typedef struct Static {
 EXPORT int32_t access_static(const Static *s_ptr) {
   return s_ptr->recurse->recurse->value;
 }
+
+/* Test: unexposed_reachable_alloc */
+
+EXPORT uintptr_t do_one_deref(const int32_t ***ptr) {
+  return (uintptr_t)*ptr;
+}
diff --git a/src/tools/miri/tests/native-lib/ptr_write_access.c b/src/tools/miri/tests/native-lib/ptr_write_access.c
index fd8b005499c76..5260d0b365175 100644
--- a/src/tools/miri/tests/native-lib/ptr_write_access.c
+++ b/src/tools/miri/tests/native-lib/ptr_write_access.c
@@ -107,3 +107,11 @@ EXPORT void set_shared_mem(int32_t** ptr) {
 EXPORT void init_ptr_stored_in_shared_mem(int32_t val) {
   **shared_place = val;
 }
+
+/* Test: partial_init */
+
+EXPORT void init_n(int32_t n, char* ptr) {
+  for (int i=0; i<n; i++) {
+    *(ptr+i) = 0;
+  }
+}
diff --git a/src/tools/miri/tests/pass/0weak_memory_consistency.rs b/src/tools/miri/tests/pass/0weak_memory_consistency.rs
index b33aefaf1d599..e4ed9675de822 100644
--- a/src/tools/miri/tests/pass/0weak_memory_consistency.rs
+++ b/src/tools/miri/tests/pass/0weak_memory_consistency.rs
@@ -41,7 +41,15 @@ fn static_atomic_bool(val: bool) -> &'static AtomicBool {
 }
 
 /// Spins until it acquires a pre-determined value.
-fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 {
+fn spin_until_i32(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 {
+    while loc.load(ord) != val {
+        std::hint::spin_loop();
+    }
+    val
+}
+
+/// Spins until it acquires a pre-determined boolean.
+fn spin_until_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool {
     while loc.load(ord) != val {
         std::hint::spin_loop();
     }
@@ -65,7 +73,7 @@ fn test_corr() {
     }); //                                           |                    |
     #[rustfmt::skip] //                              |synchronizes-with   |happens-before
     let j3 = spawn(move || { //                      |                    |
-        loads_value(&y, Acquire, 1); // <------------+                    |
+        spin_until_i32(&y, Acquire, 1); // <---------+                    |
         x.load(Relaxed) // <----------------------------------------------+
         // The two reads on x are ordered by hb, so they cannot observe values
         // differently from the modification order. If the first read observed
@@ -90,12 +98,12 @@ fn test_wrc() {
     }); //                                           |                     |
     #[rustfmt::skip] //                              |synchronizes-with    |
     let j2 = spawn(move || { //                      |                     |
-        loads_value(&x, Acquire, 1); // <------------+                     |
+        spin_until_i32(&x, Acquire, 1); // <---------+                     |
         y.store(1, Release); // ---------------------+                     |happens-before
     }); //                                           |                     |
     #[rustfmt::skip] //                              |synchronizes-with    |
     let j3 = spawn(move || { //                      |                     |
-        loads_value(&y, Acquire, 1); // <------------+                     |
+        spin_until_i32(&y, Acquire, 1); // <---------+                     |
         x.load(Relaxed) // <-----------------------------------------------+
     });
 
@@ -121,7 +129,7 @@ fn test_message_passing() {
     #[rustfmt::skip] //                              |synchronizes-with  | happens-before
     let j2 = spawn(move || { //                      |                   |
         let x = x; // avoid field capturing          |                   |
-        loads_value(&y, Acquire, 1); // <------------+                   |
+        spin_until_i32(&y, Acquire, 1); // <---------+                   |
         unsafe { *x.0 } // <---------------------------------------------+
     });
 
@@ -216,12 +224,12 @@ fn test_sync_through_rmw_and_fences() {
     let go = static_atomic_bool(false);
 
     let t1 = spawn(move || {
-        while !go.load(Relaxed) {}
+        spin_until_bool(go, Relaxed, true);
         rdmw(y, x, z)
     });
 
     let t2 = spawn(move || {
-        while !go.load(Relaxed) {}
+        spin_until_bool(go, Relaxed, true);
         rdmw(z, x, y)
     });
 
diff --git a/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs b/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs
index 45cc5e6e72214..937c2a8cf282c 100644
--- a/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs
+++ b/src/tools/miri/tests/pass/0weak_memory_consistency_sc.rs
@@ -20,7 +20,15 @@ fn static_atomic_bool(val: bool) -> &'static AtomicBool {
 }
 
 /// Spins until it acquires a pre-determined value.
-fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 {
+fn spin_until_i32(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 {
+    while loc.load(ord) != val {
+        std::hint::spin_loop();
+    }
+    val
+}
+
+/// Spins until it acquires a pre-determined boolean.
+fn spin_until_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool {
     while loc.load(ord) != val {
         std::hint::spin_loop();
     }
@@ -60,11 +68,11 @@ fn test_iriw_sc_rlx() {
     let a = spawn(move || x.store(true, Relaxed));
     let b = spawn(move || y.store(true, Relaxed));
     let c = spawn(move || {
-        while !x.load(SeqCst) {}
+        spin_until_bool(x, SeqCst, true);
         y.load(SeqCst)
     });
     let d = spawn(move || {
-        while !y.load(SeqCst) {}
+        spin_until_bool(y, SeqCst, true);
         x.load(SeqCst)
     });
 
@@ -136,7 +144,7 @@ fn test_cpp20_rwc_syncs() {
     });
 
     let j2 = spawn(move || {
-        loads_value(&x, Relaxed, 1);
+        spin_until_i32(&x, Relaxed, 1);
         fence(SeqCst);
         y.load(Relaxed)
     });
diff --git a/src/tools/miri/tests/pass/alloc-access-tracking.rs b/src/tools/miri/tests/pass/alloc-access-tracking.rs
index 0e88951dc43f9..9eba0ca171bc6 100644
--- a/src/tools/miri/tests/pass/alloc-access-tracking.rs
+++ b/src/tools/miri/tests/pass/alloc-access-tracking.rs
@@ -1,7 +1,7 @@
 #![no_std]
 #![no_main]
-//@compile-flags: -Zmiri-track-alloc-id=20 -Zmiri-track-alloc-accesses -Cpanic=abort
-//@normalize-stderr-test: "id 20" -> "id $$ALLOC"
+//@compile-flags: -Zmiri-track-alloc-id=19 -Zmiri-track-alloc-accesses -Cpanic=abort
+//@normalize-stderr-test: "id 19" -> "id $$ALLOC"
 //@only-target: linux # alloc IDs differ between OSes (due to extern static allocations)
 
 extern "Rust" {
diff --git a/src/tools/miri/tests/pass/shims/ctor.rs b/src/tools/miri/tests/pass/shims/ctor.rs
new file mode 100644
index 0000000000000..b997d2386b82c
--- /dev/null
+++ b/src/tools/miri/tests/pass/shims/ctor.rs
@@ -0,0 +1,46 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+static COUNT: AtomicUsize = AtomicUsize::new(0);
+
+unsafe extern "C" fn ctor() {
+    COUNT.fetch_add(1, Ordering::Relaxed);
+}
+
+#[rustfmt::skip]
+macro_rules! ctor {
+    ($ident:ident = $ctor:ident) => {
+        #[cfg_attr(
+            all(any(
+                target_os = "linux",
+                target_os = "android",
+                target_os = "dragonfly",
+                target_os = "freebsd",
+                target_os = "haiku",
+                target_os = "illumos",
+                target_os = "netbsd",
+                target_os = "openbsd",
+                target_os = "solaris",
+                target_os = "none",
+                target_family = "wasm",
+            )),
+            link_section = ".init_array"
+        )]
+        #[cfg_attr(windows, link_section = ".CRT$XCU")]
+        #[cfg_attr(
+            any(target_os = "macos", target_os = "ios"),
+            // We do not set the `mod_init_funcs` flag here since ctor/inventory also do not do
+            // that. See <https://github.com/rust-lang/miri/pull/4459#discussion_r2200115629>.
+            link_section = "__DATA,__mod_init_func"
+        )]
+        #[used]
+        static $ident: unsafe extern "C" fn() = $ctor;
+    };
+}
+
+ctor! { CTOR1 = ctor }
+ctor! { CTOR2 = ctor }
+ctor! { CTOR3 = ctor }
+
+fn main() {
+    assert_eq!(COUNT.load(Ordering::Relaxed), 3, "ctors did not run");
+}
diff --git a/src/tools/miri/tests/pass/weak_memory/weak.rs b/src/tools/miri/tests/pass/weak_memory/weak.rs
index eeab4ebf129e8..199f83f052860 100644
--- a/src/tools/miri/tests/pass/weak_memory/weak.rs
+++ b/src/tools/miri/tests/pass/weak_memory/weak.rs
@@ -24,7 +24,7 @@ fn static_atomic(val: usize) -> &'static AtomicUsize {
 }
 
 // Spins until it reads the given value
-fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
+fn spin_until(loc: &AtomicUsize, val: usize) -> usize {
     while loc.load(Relaxed) != val {
         std::hint::spin_loop();
     }
@@ -85,7 +85,7 @@ fn initialization_write(add_fence: bool) -> bool {
     });
 
     let j2 = spawn(move || {
-        reads_value(wait, 1);
+        spin_until(wait, 1);
         if add_fence {
             fence(AcqRel);
         }
@@ -119,12 +119,12 @@ fn faa_replaced_by_load() -> bool {
     let go = static_atomic(0);
 
     let t1 = spawn(move || {
-        while go.load(Relaxed) == 0 {}
+        spin_until(go, 1);
         rdmw(y, x, z)
     });
 
     let t2 = spawn(move || {
-        while go.load(Relaxed) == 0 {}
+        spin_until(go, 1);
         rdmw(z, x, y)
     });