From dfd553af25c649366343bd966a79b72df861fd49 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 22 Apr 2025 07:46:09 +0000
Subject: [PATCH 001/133] fix: Switch to resolver v2

The published crates fail to build with an edition less than 2024
because they are packaged with `resolver = "3"`, which is a 2024-only
option. Revert back to resolver v2 to drop this requirement.

Fixes: https://github.com/rust-lang/compiler-builtins/issues/883
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 75bb81ec1..b39ec8a25 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-resolver = "3"
+resolver = "2"
 members = [
     "builtins-test",
     "compiler-builtins",

From 0bdef053a00a5a17722733c550606ad15d62cea6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 22 Apr 2025 08:05:23 +0000
Subject: [PATCH 002/133] chore: Release libm v0.2.13

---
 libm/CHANGELOG.md | 6 ++++++
 libm/Cargo.toml   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md
index c507608dd..292561f86 100644
--- a/libm/CHANGELOG.md
+++ b/libm/CHANGELOG.md
@@ -8,6 +8,12 @@ and this project adheres to
 
 ## [Unreleased]
 
+## [0.2.13](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.12...libm-v0.2.13) - 2025-04-21
+
+### Fixed
+
+- Switch back to workspace resolver v2 to unbreak builds without the 2024 edition
+
 ## [0.2.12](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.11...libm-v0.2.12) - 2025-04-21
 
 - Mark generic functions `#[inline]`
diff --git a/libm/Cargo.toml b/libm/Cargo.toml
index dc553ca4a..f80715ff6 100644
--- a/libm/Cargo.toml
+++ b/libm/Cargo.toml
@@ -8,7 +8,7 @@ license = "MIT"
 name = "libm"
 readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"
-version = "0.2.12"
+version = "0.2.13"
 edition = "2021"
 rust-version = "1.63"
 

From 667ba286c7d27d124fe4b4f0d0c933212313105f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 22 Apr 2025 16:30:17 -0400
Subject: [PATCH 003/133] musl: Update submodule

Update the musl submodule to c47ad25ea3 ("iconv: harden UTF-8 output
code path against input decoder bugs").
---
 crates/musl-math-sys/musl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl
index 61399d4bd..c47ad25ea 160000
--- a/crates/musl-math-sys/musl
+++ b/crates/musl-math-sys/musl
@@ -1 +1 @@
-Subproject commit 61399d4bd02ae1ec03068445aa7ffe9174466bfd
+Subproject commit c47ad25ea3b484e10326f933e927c0bc8cded3da

From 672ba576cb97c4d89e45d30509c6a989ff57e4aa Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 23 Apr 2025 06:46:31 +0000
Subject: [PATCH 004/133] libm-macros: Start tracking which functions are
 public

It would be nice to reuse some of the macro structure for internal
functions, like `rem_pio2`. To facilitate this, add a `public` field and
make it available in the macro's API.
---
 crates/libm-macros/src/lib.rs     |   6 +-
 crates/libm-macros/src/shared.rs  | 383 +++++++++++++++++-------------
 crates/libm-macros/tests/basic.rs |   2 +
 libm-test/src/op.rs               |  10 +-
 4 files changed, 226 insertions(+), 175 deletions(-)

diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index 3cdd364e8..144676c12 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -12,7 +12,7 @@ use syn::visit_mut::VisitMut;
 use syn::{Ident, ItemEnum};
 
 const KNOWN_TYPES: &[&str] = &[
-    "FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet",
+    "FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet", "public",
 ];
 
 /// Populate an enum with a variant representing function. Names are in upper camel case.
@@ -80,6 +80,8 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p
 ///         RustArgs: $RustArgs:ty,
 ///         // The Rust version's return type (e.g. `(f32, f32)`)
 ///         RustRet: $RustRet:ty,
+///         // True if this is part of `libm`'s public API
+///         public: $public:expr,
 ///         // Attributes for the current function, if any
 ///         attrs: [$($attr:meta),*],
 ///         // Extra tokens passed directly (if any)
@@ -329,6 +331,7 @@ fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result<pm2::T
         let c_ret = &func.c_sig.returns;
         let rust_args = &func.rust_sig.args;
         let rust_ret = &func.rust_sig.returns;
+        let public = func.public;
 
         let mut ty_fields = Vec::new();
         for ty in &input.emit_types {
@@ -340,6 +343,7 @@ fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result<pm2::T
                 "RustFn" => quote! { RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), },
                 "RustArgs" => quote! { RustArgs: ( #(#rust_args),* ,), },
                 "RustRet" => quote! { RustRet: ( #(#rust_ret),* ), },
+                "public" => quote! { public: #public, },
                 _ => unreachable!("checked in validation"),
             };
             ty_fields.push(field);
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 750ed1afb..1cefe4e8c 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -3,16 +3,26 @@
 use std::fmt;
 use std::sync::LazyLock;
 
-const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])] = &[
-    (
+struct NestedOp {
+    float_ty: FloatTy,
+    rust_sig: Signature,
+    c_sig: Option<Signature>,
+    fn_list: &'static [&'static str],
+    public: bool,
+}
+
+/// We need a flat list to work with most of the time, but define things as a more convenient
+/// nested list.
+const ALL_OPERATIONS_NESTED: &[NestedOp] = &[
+    NestedOp {
         // `fn(f16) -> f16`
-        FloatTy::F16,
-        Signature {
+        float_ty: FloatTy::F16,
+        rust_sig: Signature {
             args: &[Ty::F16],
             returns: &[Ty::F16],
         },
-        None,
-        &[
+        c_sig: None,
+        fn_list: &[
             "ceilf16",
             "fabsf16",
             "floorf16",
@@ -22,16 +32,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "sqrtf16",
             "truncf16",
         ],
-    ),
-    (
+        public: true,
+    },
+    NestedOp {
         // `fn(f32) -> f32`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32],
             returns: &[Ty::F32],
         },
-        None,
-        &[
+        c_sig: None,
+        fn_list: &[
             "acosf",
             "acoshf",
             "asinf",
@@ -70,16 +81,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "y0f",
             "y1f",
         ],
-    ),
-    (
+        public: true,
+    },
+    NestedOp {
         // `(f64) -> f64`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64],
             returns: &[Ty::F64],
         },
-        None,
-        &[
+        c_sig: None,
+        fn_list: &[
             "acos",
             "acosh",
             "asin",
@@ -118,16 +130,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "y0",
             "y1",
         ],
-    ),
-    (
+        public: true,
+    },
+    NestedOp {
         // `fn(f128) -> f128`
-        FloatTy::F128,
-        Signature {
+        float_ty: FloatTy::F128,
+        rust_sig: Signature {
             args: &[Ty::F128],
             returns: &[Ty::F128],
         },
-        None,
-        &[
+        c_sig: None,
+        fn_list: &[
             "ceilf128",
             "fabsf128",
             "floorf128",
@@ -137,16 +150,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "sqrtf128",
             "truncf128",
         ],
-    ),
-    (
+        public: true,
+    },
+    NestedOp {
         // `(f16, f16) -> f16`
-        FloatTy::F16,
-        Signature {
+        float_ty: FloatTy::F16,
+        rust_sig: Signature {
             args: &[Ty::F16, Ty::F16],
             returns: &[Ty::F16],
         },
-        None,
-        &[
+        c_sig: None,
+        fn_list: &[
             "copysignf16",
             "fdimf16",
             "fmaxf16",
@@ -157,16 +171,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "fminimumf16",
             "fmodf16",
         ],
-    ),
-    (
+        public: true,
+    },
+    NestedOp {
         // `(f32, f32) -> f32`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32, Ty::F32],
             returns: &[Ty::F32],
         },
-        None,
-        &[
+        c_sig: None,
+        fn_list: &[
             "atan2f",
             "copysignf",
             "fdimf",
@@ -182,16 +197,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "powf",
             "remainderf",
         ],
-    ),
-    (
+        public: true,
+    },
+    NestedOp {
         // `(f64, f64) -> f64`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64, Ty::F64],
             returns: &[Ty::F64],
         },
-        None,
-        &[
+        c_sig: None,
+        fn_list: &[
             "atan2",
             "copysign",
             "fdim",
@@ -207,16 +223,17 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "pow",
             "remainder",
         ],
-    ),
-    (
+        public: true,
+    },
+    NestedOp {
         // `(f128, f128) -> f128`
-        FloatTy::F128,
-        Signature {
+        float_ty: FloatTy::F128,
+        rust_sig: Signature {
             args: &[Ty::F128, Ty::F128],
             returns: &[Ty::F128],
         },
-        None,
-        &[
+        c_sig: None,
+        fn_list: &[
             "copysignf128",
             "fdimf128",
             "fmaxf128",
@@ -227,221 +244,241 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "fminimumf128",
             "fmodf128",
         ],
-    ),
-    (
+        public: true,
+    },
+    NestedOp {
         // `(f32, f32, f32) -> f32`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32, Ty::F32, Ty::F32],
             returns: &[Ty::F32],
         },
-        None,
-        &["fmaf"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["fmaf"],
+        public: true,
+    },
+    NestedOp {
         // `(f64, f64, f64) -> f64`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64, Ty::F64, Ty::F64],
             returns: &[Ty::F64],
         },
-        None,
-        &["fma"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["fma"],
+        public: true,
+    },
+    NestedOp {
         // `(f128, f128, f128) -> f128`
-        FloatTy::F128,
-        Signature {
+        float_ty: FloatTy::F128,
+        rust_sig: Signature {
             args: &[Ty::F128, Ty::F128, Ty::F128],
             returns: &[Ty::F128],
         },
-        None,
-        &["fmaf128"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["fmaf128"],
+        public: true,
+    },
+    NestedOp {
         // `(f32) -> i32`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32],
             returns: &[Ty::I32],
         },
-        None,
-        &["ilogbf"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["ilogbf"],
+        public: true,
+    },
+    NestedOp {
         // `(f64) -> i32`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64],
             returns: &[Ty::I32],
         },
-        None,
-        &["ilogb"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["ilogb"],
+        public: true,
+    },
+    NestedOp {
         // `(i32, f32) -> f32`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::I32, Ty::F32],
             returns: &[Ty::F32],
         },
-        None,
-        &["jnf", "ynf"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["jnf", "ynf"],
+        public: true,
+    },
+    NestedOp {
         // `(i32, f64) -> f64`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::I32, Ty::F64],
             returns: &[Ty::F64],
         },
-        None,
-        &["jn", "yn"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["jn", "yn"],
+        public: true,
+    },
+    NestedOp {
         // `(f16, i32) -> f16`
-        FloatTy::F16,
-        Signature {
+        float_ty: FloatTy::F16,
+        rust_sig: Signature {
             args: &[Ty::F16, Ty::I32],
             returns: &[Ty::F16],
         },
-        None,
-        &["ldexpf16", "scalbnf16"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["ldexpf16", "scalbnf16"],
+        public: true,
+    },
+    NestedOp {
         // `(f32, i32) -> f32`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32, Ty::I32],
             returns: &[Ty::F32],
         },
-        None,
-        &["ldexpf", "scalbnf"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["ldexpf", "scalbnf"],
+        public: true,
+    },
+    NestedOp {
         // `(f64, i64) -> f64`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64, Ty::I32],
             returns: &[Ty::F64],
         },
-        None,
-        &["ldexp", "scalbn"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["ldexp", "scalbn"],
+        public: true,
+    },
+    NestedOp {
         // `(f128, i32) -> f128`
-        FloatTy::F128,
-        Signature {
+        float_ty: FloatTy::F128,
+        rust_sig: Signature {
             args: &[Ty::F128, Ty::I32],
             returns: &[Ty::F128],
         },
-        None,
-        &["ldexpf128", "scalbnf128"],
-    ),
-    (
+        c_sig: None,
+        fn_list: &["ldexpf128", "scalbnf128"],
+        public: true,
+    },
+    NestedOp {
         // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32],
             returns: &[Ty::F32, Ty::F32],
         },
-        Some(Signature {
+        c_sig: Some(Signature {
             args: &[Ty::F32, Ty::MutF32],
             returns: &[Ty::F32],
         }),
-        &["modff"],
-    ),
-    (
+        fn_list: &["modff"],
+        public: true,
+    },
+    NestedOp {
         // `(f64, &mut f64) -> f64` as  `(f64) -> (f64, f64)`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64],
             returns: &[Ty::F64, Ty::F64],
         },
-        Some(Signature {
+        c_sig: Some(Signature {
             args: &[Ty::F64, Ty::MutF64],
             returns: &[Ty::F64],
         }),
-        &["modf"],
-    ),
-    (
+        fn_list: &["modf"],
+        public: true,
+    },
+    NestedOp {
         // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32],
             returns: &[Ty::F32, Ty::I32],
         },
-        Some(Signature {
+        c_sig: Some(Signature {
             args: &[Ty::F32, Ty::MutCInt],
             returns: &[Ty::F32],
         }),
-        &["frexpf", "lgammaf_r"],
-    ),
-    (
+        fn_list: &["frexpf", "lgammaf_r"],
+        public: true,
+    },
+    NestedOp {
         // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64],
             returns: &[Ty::F64, Ty::I32],
         },
-        Some(Signature {
+        c_sig: Some(Signature {
             args: &[Ty::F64, Ty::MutCInt],
             returns: &[Ty::F64],
         }),
-        &["frexp", "lgamma_r"],
-    ),
-    (
+        fn_list: &["frexp", "lgamma_r"],
+        public: true,
+    },
+    NestedOp {
         // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32, Ty::F32],
             returns: &[Ty::F32, Ty::I32],
         },
-        Some(Signature {
+        c_sig: Some(Signature {
             args: &[Ty::F32, Ty::F32, Ty::MutCInt],
             returns: &[Ty::F32],
         }),
-        &["remquof"],
-    ),
-    (
+        fn_list: &["remquof"],
+        public: true,
+    },
+    NestedOp {
         // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64, Ty::F64],
             returns: &[Ty::F64, Ty::I32],
         },
-        Some(Signature {
+        c_sig: Some(Signature {
             args: &[Ty::F64, Ty::F64, Ty::MutCInt],
             returns: &[Ty::F64],
         }),
-        &["remquo"],
-    ),
-    (
+        fn_list: &["remquo"],
+        public: true,
+    },
+    NestedOp {
         // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)`
-        FloatTy::F32,
-        Signature {
+        float_ty: FloatTy::F32,
+        rust_sig: Signature {
             args: &[Ty::F32],
             returns: &[Ty::F32, Ty::F32],
         },
-        Some(Signature {
+        c_sig: Some(Signature {
             args: &[Ty::F32, Ty::MutF32, Ty::MutF32],
             returns: &[],
         }),
-        &["sincosf"],
-    ),
-    (
+        fn_list: &["sincosf"],
+        public: true,
+    },
+    NestedOp {
         // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)`
-        FloatTy::F64,
-        Signature {
+        float_ty: FloatTy::F64,
+        rust_sig: Signature {
             args: &[Ty::F64],
             returns: &[Ty::F64, Ty::F64],
         },
-        Some(Signature {
+        c_sig: Some(Signature {
             args: &[Ty::F64, Ty::MutF64, Ty::MutF64],
             returns: &[],
         }),
-        &["sincos"],
-    ),
+        fn_list: &["sincos"],
+        public: true,
+    },
 ];
 
 /// A type used in a function signature.
@@ -520,27 +557,31 @@ pub struct MathOpInfo {
     pub c_sig: Signature,
     /// Function signature for Rust implementations
     pub rust_sig: Signature,
+    /// True if part of libm's public API
+    pub public: bool,
 }
 
 /// A flat representation of `ALL_FUNCTIONS`.
 pub static ALL_OPERATIONS: LazyLock<Vec<MathOpInfo>> = LazyLock::new(|| {
     let mut ret = Vec::new();
 
-    for (base_fty, rust_sig, c_sig, names) in ALL_OPERATIONS_NESTED {
-        for name in *names {
+    for op in ALL_OPERATIONS_NESTED {
+        let fn_names = op.fn_list;
+        for name in fn_names {
             let api = MathOpInfo {
                 name,
-                float_ty: *base_fty,
-                rust_sig: rust_sig.clone(),
-                c_sig: c_sig.clone().unwrap_or_else(|| rust_sig.clone()),
+                float_ty: op.float_ty,
+                rust_sig: op.rust_sig.clone(),
+                c_sig: op.c_sig.clone().unwrap_or_else(|| op.rust_sig.clone()),
+                public: op.public,
             };
             ret.push(api);
         }
 
-        if !names.is_sorted() {
-            let mut sorted = (*names).to_owned();
+        if !fn_names.is_sorted() {
+            let mut sorted = (*fn_names).to_owned();
             sorted.sort_unstable();
-            panic!("names list is not sorted: {names:?}\nExpected: {sorted:?}");
+            panic!("names list is not sorted: {fn_names:?}\nExpected: {sorted:?}");
         }
     }
 
diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs
index 5314e84bb..260350ef2 100644
--- a/crates/libm-macros/tests/basic.rs
+++ b/crates/libm-macros/tests/basic.rs
@@ -13,6 +13,7 @@ macro_rules! basic {
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
         RustRet: $RustRet:ty,
+        public: $public:expr,
         attrs: [$($attr:meta),*],
         extra: [$($extra_tt:tt)*],
         fn_extra: $fn_extra:expr,
@@ -25,6 +26,7 @@ macro_rules! basic {
             type RustFnTy = $RustFn;
             type RustArgsTy = $RustArgs;
             type RustRetTy = $RustRet;
+            const PUBLIC: bool = $public;
             const A: &[&str] = &[$($extra_tt)*];
             fn foo(a: f32) -> f32 {
                 $fn_extra(a)
diff --git a/libm-test/src/op.rs b/libm-test/src/op.rs
index bd17aad7d..afd445ff9 100644
--- a/libm-test/src/op.rs
+++ b/libm-test/src/op.rs
@@ -90,6 +90,9 @@ pub trait MathOp {
 
     /// The function in `libm` which can be called.
     const ROUTINE: Self::RustFn;
+
+    /// Whether or not the function is part of libm public API.
+    const PUBLIC: bool;
 }
 
 /// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types).
@@ -107,7 +110,7 @@ pub type OpRustArgs<Op> = <Op as MathOp>::RustArgs;
 /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
 pub type OpRustRet<Op> = <Op as MathOp>::RustRet;
 
-macro_rules! do_thing {
+macro_rules! create_op_modules {
     // Matcher for unary functions
     (
         fn_name: $fn_name:ident,
@@ -118,8 +121,8 @@ macro_rules! do_thing {
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
         RustRet: $RustRet:ty,
+        public: $public:expr,
         attrs: [$($attr:meta),*],
-
     ) => {
         paste::paste! {
             $(#[$attr])*
@@ -138,6 +141,7 @@ macro_rules! do_thing {
 
                     const IDENTIFIER: Identifier = Identifier::[< $fn_name:camel >];
                     const ROUTINE: Self::RustFn = libm::$fn_name;
+                    const PUBLIC: bool = $public;
                 }
             }
 
@@ -146,6 +150,6 @@ macro_rules! do_thing {
 }
 
 libm_macros::for_each_function! {
-    callback: do_thing,
+    callback: create_op_modules,
     emit_types: all,
 }

From bf792806487d314c52660edeb2a0557eeadc728a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 23 Apr 2025 07:47:48 +0000
Subject: [PATCH 005/133] libm-macros: Allow a way to bulk match f16 and f128
 functions

These are never available in musl, so introduce easier ways to skip them
rather than needing to exclude f16/f128 functions in three different
places.
---
 crates/libm-macros/src/lib.rs         | 30 +++++++++++
 crates/libm-macros/src/parse.rs       | 16 +++++-
 crates/libm-macros/tests/basic.rs     | 72 +++++++++++++++++++++++++++
 crates/util/src/main.rs               | 44 ++--------------
 libm-test/benches/random.rs           | 45 ++---------------
 libm-test/tests/compare_built_musl.rs | 43 ++--------------
 6 files changed, 128 insertions(+), 122 deletions(-)

diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index 144676c12..e8afe3aad 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -116,6 +116,9 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p
 ///     // a simplified match-like syntax.
 ///     fn_extra: match MACRO_FN_NAME {
 ///         hypot | hypotf => |x| x.hypot(),
+///         // `ALL_*` magic matchers also work to extract specific types
+///         ALL_F64 => |x| x,
+///         // The default pattern gets applied to everything that did not match
 ///         _ => |x| x,
 ///     },
 /// }
@@ -138,6 +141,27 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream {
 ///
 /// Returns the list of function names that we should expand for.
 fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static MathOpInfo>> {
+    // Replace magic mappers with a list of relevant functions.
+    if let Some(map) = &mut input.fn_extra {
+        for (name, ty) in [
+            ("ALL_F16", FloatTy::F16),
+            ("ALL_F32", FloatTy::F32),
+            ("ALL_F64", FloatTy::F64),
+            ("ALL_F128", FloatTy::F128),
+        ] {
+            let Some(k) = map.keys().find(|key| *key == name) else {
+                continue;
+            };
+
+            let key = k.clone();
+            let val = map.remove(&key).unwrap();
+
+            for op in ALL_OPERATIONS.iter().filter(|op| op.float_ty == ty) {
+                map.insert(Ident::new(op.name, key.span()), val.clone());
+            }
+        }
+    }
+
     // Collect lists of all functions that are provied as macro inputs in various fields (only,
     // skip, attributes).
     let attr_mentions = input
@@ -195,6 +219,12 @@ fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static MathOpInfo>
             continue;
         }
 
+        // Omit f16 and f128 functions if requested
+        if input.skip_f16_f128 && (func.float_ty == FloatTy::F16 || func.float_ty == FloatTy::F128)
+        {
+            continue;
+        }
+
         // Run everything else
         fn_list.push(func);
     }
diff --git a/crates/libm-macros/src/parse.rs b/crates/libm-macros/src/parse.rs
index d60d1247a..4876f3ef7 100644
--- a/crates/libm-macros/src/parse.rs
+++ b/crates/libm-macros/src/parse.rs
@@ -6,7 +6,7 @@ use syn::parse::{Parse, ParseStream, Parser};
 use syn::punctuated::Punctuated;
 use syn::spanned::Spanned;
 use syn::token::{self, Comma};
-use syn::{Arm, Attribute, Expr, ExprMatch, Ident, Meta, Token, bracketed};
+use syn::{Arm, Attribute, Expr, ExprMatch, Ident, LitBool, Meta, Token, bracketed};
 
 /// The input to our macro; just a list of `field: value` items.
 #[derive(Debug)]
@@ -50,6 +50,8 @@ pub struct StructuredInput {
     pub emit_types: Vec<Ident>,
     /// Skip these functions
     pub skip: Vec<Ident>,
+    /// If true, omit f16 and f128 functions that aren't present in other libraries.
+    pub skip_f16_f128: bool,
     /// Invoke only for these functions
     pub only: Option<Vec<Ident>>,
     /// Attributes that get applied to specific functions
@@ -70,6 +72,7 @@ impl StructuredInput {
         let cb_expr = expect_field(&mut map, "callback")?;
         let emit_types_expr = expect_field(&mut map, "emit_types").ok();
         let skip_expr = expect_field(&mut map, "skip").ok();
+        let skip_f16_f128 = expect_field(&mut map, "skip_f16_f128").ok();
         let only_expr = expect_field(&mut map, "only").ok();
         let attr_expr = expect_field(&mut map, "attributes").ok();
         let extra = expect_field(&mut map, "extra").ok();
@@ -93,6 +96,11 @@ impl StructuredInput {
             None => Vec::new(),
         };
 
+        let skip_f16_f128 = match skip_f16_f128 {
+            Some(expr) => expect_litbool(expr)?.value,
+            None => false,
+        };
+
         let only_span = only_expr.as_ref().map(|expr| expr.span());
         let only = match only_expr {
             Some(expr) => Some(Parser::parse2(parse_ident_array, expr.into_token_stream())?),
@@ -122,6 +130,7 @@ impl StructuredInput {
             callback: expect_ident(cb_expr)?,
             emit_types,
             skip,
+            skip_f16_f128,
             only,
             only_span,
             attributes,
@@ -220,6 +229,11 @@ fn expect_ident(expr: Expr) -> syn::Result<Ident> {
     syn::parse2(expr.into_token_stream())
 }
 
+/// Coerce an expression into a simple keyword.
+fn expect_litbool(expr: Expr) -> syn::Result<LitBool> {
+    syn::parse2(expr.into_token_stream())
+}
+
 /// Parse either a single identifier (`foo`) or an array of identifiers (`[foo, bar, baz]`).
 fn parse_ident_or_array(input: ParseStream) -> syn::Result<Vec<Ident>> {
     if !input.peek(token::Bracket) {
diff --git a/crates/libm-macros/tests/basic.rs b/crates/libm-macros/tests/basic.rs
index 260350ef2..b42762622 100644
--- a/crates/libm-macros/tests/basic.rs
+++ b/crates/libm-macros/tests/basic.rs
@@ -103,3 +103,75 @@ mod test_emit_types {
         emit_types: [RustFn, RustArgs],
     }
 }
+
+#[test]
+fn test_skip_f16_f128() {
+    macro_rules! skip_f16_f128 {
+        (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+        extra: $vec:ident,
+    ) => {
+            $vec.push(stringify!($fn_name));
+        };
+    }
+
+    let mut v = Vec::new();
+    // Test with no extra, no skip, and no attributes
+    libm_macros::for_each_function! {
+        callback: skip_f16_f128,
+        skip_f16_f128: true,
+        extra: v,
+    }
+
+    for name in v {
+        assert!(!name.contains("f16"), "{name}");
+        assert!(!name.contains("f128"), "{name}");
+    }
+}
+
+#[test]
+fn test_fn_extra_expansion() {
+    macro_rules! fn_extra_expansion {
+        (
+            fn_name: $fn_name:ident,
+            attrs: [$($attr:meta),*],
+            fn_extra: $vec:expr,
+        ) => {
+            $vec.push(stringify!($fn_name));
+        };
+    }
+
+    let mut vf16 = Vec::new();
+    let mut vf32 = Vec::new();
+    let mut vf64 = Vec::new();
+    let mut vf128 = Vec::new();
+
+    // Test with no extra, no skip, and no attributes
+    libm_macros::for_each_function! {
+        callback: fn_extra_expansion,
+        fn_extra: match MACRO_FN_NAME {
+            ALL_F16 => vf16,
+            ALL_F32 => vf32,
+            ALL_F64 => vf64,
+            ALL_F128 => vf128,
+        }
+    }
+
+    // Skip functions with a suffix after the type spec
+    vf16.retain(|name| !name.ends_with("_r"));
+    vf32.retain(|name| !name.ends_with("_r"));
+    vf64.retain(|name| !name.ends_with("_r"));
+    vf128.retain(|name| !name.ends_with("_r"));
+
+    for name in vf16 {
+        assert!(name.ends_with("f16"), "{name}");
+    }
+    for name in vf32 {
+        assert!(name.ends_with("f"), "{name}");
+    }
+    let _ = vf64;
+    for name in vf128 {
+        assert!(name.ends_with("f128"), "{name}");
+    }
+}
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index e70578699..597218153 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -86,55 +86,19 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
         emit_types: [CFn, RustFn, RustArgs],
         extra: (basis, op, inputs),
         fn_extra: match MACRO_FN_NAME {
-            ceilf128
-            | ceilf16
-            | copysignf128
-            | copysignf16
-            | fabsf128
-            | fabsf16
-            | fdimf128
-            | fdimf16
-            | floorf128
-            | floorf16
-            | fmaf128
-            | fmaxf128
-            | fmaxf16
-            | fmaximum
+            // Not provided by musl
+            fmaximum
             | fmaximum_num
             | fmaximum_numf
-            | fmaximum_numf128
-            | fmaximum_numf16
             | fmaximumf
-            | fmaximumf128
-            | fmaximumf16
-            | fminf128
-            | fminf16
             | fminimum
             | fminimum_num
             | fminimum_numf
-            | fminimum_numf128
-            | fminimum_numf16
             | fminimumf
-            | fminimumf128
-            | fminimumf16
-            | fmodf128
-            | fmodf16
-            | ldexpf128
-            | ldexpf16
-            | rintf128
-            | rintf16
             | roundeven
             | roundevenf
-            | roundevenf128
-            | roundevenf16
-            | roundf128
-            | roundf16
-            | scalbnf128
-            | scalbnf16
-            | sqrtf128
-            | sqrtf16
-            | truncf128
-            | truncf16  => None,
+            | ALL_F16
+            | ALL_F128 => None,
             _ => Some(musl_math_sys::MACRO_FN_NAME)
         }
     }
diff --git a/libm-test/benches/random.rs b/libm-test/benches/random.rs
index 81f58e3a6..1b17f049e 100644
--- a/libm-test/benches/random.rs
+++ b/libm-test/benches/random.rs
@@ -125,56 +125,19 @@ libm_macros::for_each_function! {
         // FIXME(correctness): exp functions have the wrong result on i586
         exp10 | exp10f | exp2 | exp2f => (true, Some(musl_math_sys::MACRO_FN_NAME)),
 
-        // Musl does not provide `f16` and `f128` functions
-        ceilf128
-        | ceilf16
-        | copysignf128
-        | copysignf16
-        | fabsf128
-        | fabsf16
-        | fdimf128
-        | fdimf16
-        | floorf128
-        | floorf16
-        | fmaf128
-        | fmaxf128
-        | fmaxf16
-        | fmaximum
+        // Musl does not provide `f16` and `f128` functions, as well as a handful of others
+        fmaximum
         | fmaximum_num
         | fmaximum_numf
-        | fmaximum_numf128
-        | fmaximum_numf16
         | fmaximumf
-        | fmaximumf128
-        | fmaximumf16
-        | fminf128
-        | fminf16
         | fminimum
         | fminimum_num
         | fminimum_numf
-        | fminimum_numf128
-        | fminimum_numf16
         | fminimumf
-        | fminimumf128
-        | fminimumf16
-        | fmodf128
-        | fmodf16
-        | ldexpf128
-        | ldexpf16
-        | rintf128
-        | rintf16
         | roundeven
         | roundevenf
-        | roundevenf128
-        | roundevenf16
-        | roundf128
-        | roundf16
-        | scalbnf128
-        | scalbnf16
-        | sqrtf128
-        | sqrtf16
-        | truncf128
-        | truncf16 => (false, None),
+        | ALL_F16
+        | ALL_F128 => (false, None),
 
         // By default we never skip (false) and always have a musl function available
         _ => (false, Some(musl_math_sys::MACRO_FN_NAME))
diff --git a/libm-test/tests/compare_built_musl.rs b/libm-test/tests/compare_built_musl.rs
index cbb4bd49b..6ccbb6f4c 100644
--- a/libm-test/tests/compare_built_musl.rs
+++ b/libm-test/tests/compare_built_musl.rs
@@ -76,6 +76,8 @@ macro_rules! musl_tests {
 libm_macros::for_each_function! {
     callback: musl_tests,
     attributes: [],
+    // Not provided by musl
+    skip_f16_f128: true,
     skip: [
         // TODO integer inputs
         jn,
@@ -89,55 +91,16 @@ libm_macros::for_each_function! {
 
         // Not provided by musl
         // verify-sorted-start
-        ceilf128,
-        ceilf16,
-        copysignf128,
-        copysignf16,
-        fabsf128,
-        fabsf16,
-        fdimf128,
-        fdimf16,
-        floorf128,
-        floorf16,
-        fmaf128,
-        fmaxf128,
-        fmaxf16,
         fmaximum,
         fmaximum_num,
         fmaximum_numf,
-        fmaximum_numf128,
-        fmaximum_numf16,
         fmaximumf,
-        fmaximumf128,
-        fmaximumf16,
-        fminf128,
-        fminf16,
         fminimum,
         fminimum_num,
         fminimum_numf,
-        fminimum_numf128,
-        fminimum_numf16,
         fminimumf,
-        fminimumf128,
-        fminimumf16,
-        fmodf128,
-        fmodf16,
-        ldexpf128,
-        ldexpf16,
-        rintf128,
-        rintf16,
         roundeven,
         roundevenf,
-        roundevenf128,
-        roundevenf16,
-        roundf128,
-        roundf16,
-        scalbnf128,
-        scalbnf16,
-        sqrtf128,
-        sqrtf16,
-        truncf128,
-        truncf16,
-        // verify-sorted-end
+        // // verify-sorted-end
     ],
 }

From fdbefb39d5bb0b95b29b821247044c8aaf436160 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 19 Mar 2025 00:19:04 +0000
Subject: [PATCH 006/133] Warn on `unsafe_op_in_unsafe_fn` by default

Edition 2024 requires that we avoid this. There is a lot of code that
will need to be adjusted, so start the process here with a warning that
will show up in CI.
---
 builtins-test-intrinsics/src/main.rs |  20 +--
 compiler-builtins/src/arm.rs         | 181 ++++++++++++++++++++++-----
 compiler-builtins/src/lib.rs         |   3 +
 compiler-builtins/src/macros.rs      |  15 +--
 compiler-builtins/src/mem/mod.rs     |   2 +
 compiler-builtins/src/x86_64.rs      |   2 +-
 6 files changed, 172 insertions(+), 51 deletions(-)

diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs
index c4c026368..18f943eff 100644
--- a/builtins-test-intrinsics/src/main.rs
+++ b/builtins-test-intrinsics/src/main.rs
@@ -649,14 +649,14 @@ fn something_with_a_dtor(f: &dyn Fn()) {
     f();
 }
 
-#[no_mangle]
+#[unsafe(no_mangle)]
 #[cfg(not(thumb))]
 fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int {
     run();
     0
 }
 
-#[no_mangle]
+#[unsafe(no_mangle)]
 #[cfg(thumb)]
 pub fn _start() -> ! {
     run();
@@ -669,30 +669,30 @@ pub fn _start() -> ! {
 extern "C" {}
 
 // ARM targets need these symbols
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub fn __aeabi_unwind_cpp_pr0() {}
 
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub fn __aeabi_unwind_cpp_pr1() {}
 
 #[cfg(not(any(windows, target_os = "cygwin")))]
 #[allow(non_snake_case)]
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub fn _Unwind_Resume() {}
 
 #[cfg(not(any(windows, target_os = "cygwin")))]
 #[lang = "eh_personality"]
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub extern "C" fn eh_personality() {}
 
 #[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin"))]
 mod mingw_unwinding {
-    #[no_mangle]
+    #[unsafe(no_mangle)]
     pub fn rust_eh_personality() {}
-    #[no_mangle]
+    #[unsafe(no_mangle)]
     pub fn rust_eh_unwind_resume() {}
-    #[no_mangle]
+    #[unsafe(no_mangle)]
     pub fn rust_eh_register_frames() {}
-    #[no_mangle]
+    #[unsafe(no_mangle)]
     pub fn rust_eh_unregister_frames() {}
 }
diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs
index 7859b5120..a9107e3cd 100644
--- a/compiler-builtins/src/arm.rs
+++ b/compiler-builtins/src/arm.rs
@@ -76,90 +76,205 @@ intrinsics! {
         );
     }
 
-    // FIXME: The `*4` and `*8` variants should be defined as aliases.
+    // FIXME(arm): The `*4` and `*8` variants should be defined as aliases.
 
+    /// `memcpy` provided with the `aapcs` ABI.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memcpy` requirements apply.
     #[cfg(not(target_vendor = "apple"))]
-    pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
-        crate::mem::memcpy(dest, src, n);
+    pub unsafe extern "aapcs" fn __aeabi_memcpy(dst: *mut u8, src: *const u8, n: usize) {
+        // SAFETY: memcpy preconditions apply.
+        unsafe { crate::mem::memcpy(dst, src, n) };
     }
 
+    /// `memcpy` for 4-byte alignment.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to
+    /// four bytes.
     #[cfg(not(target_vendor = "apple"))]
-    pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
+    pub unsafe extern "aapcs" fn __aeabi_memcpy4(dst: *mut u8, src: *const u8, n: usize) {
         // We are guaranteed 4-alignment, so accessing at u32 is okay.
-        let mut dest = dest as *mut u32;
-        let mut src = src as *mut u32;
+        let mut dst = dst.cast::<u32>();
+        let mut src = src.cast::<u32>();
+        debug_assert!(dst.is_aligned());
+        debug_assert!(src.is_aligned());
         let mut n = n;
 
         while n >= 4 {
-            *dest = *src;
-            dest = dest.offset(1);
-            src = src.offset(1);
+            // SAFETY: `dst` and `src` are both valid for at least 4 bytes, from
+            // `memcpy` preconditions and the loop guard.
+            unsafe { *dst = *src };
+
+            // FIXME(addr): if we can make this end-of-address-space safe without losing
+            // performance, we may want to consider that.
+            // SAFETY: memcpy is not expected to work at the end of the address space
+            unsafe {
+                dst = dst.offset(1);
+                src = src.offset(1);
+            }
+
             n -= 4;
         }
 
-        __aeabi_memcpy(dest as *mut u8, src as *const u8, n);
+        // SAFETY: `dst` and `src` will still be valid for `n` bytes
+        unsafe { __aeabi_memcpy(dst.cast::<u8>(), src.cast::<u8>(), n) };
     }
 
+    /// `memcpy` for 8-byte alignment.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memcpy` requirements apply. Additionally, `dest` and `src` must be aligned to
+    /// eight bytes.
     #[cfg(not(target_vendor = "apple"))]
-    pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
-        __aeabi_memcpy4(dest, src, n);
+    pub unsafe extern "aapcs" fn __aeabi_memcpy8(dst: *mut u8, src: *const u8, n: usize) {
+        debug_assert!(dst.addr() & 7 == 0);
+        debug_assert!(src.addr() & 7 == 0);
+
+        // SAFETY: memcpy preconditions apply, less strict alignment.
+        unsafe { __aeabi_memcpy4(dst, src, n) };
     }
 
+    /// `memmove` provided with the `aapcs` ABI.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memmove` requirements apply.
     #[cfg(not(target_vendor = "apple"))]
-    pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
-        crate::mem::memmove(dest, src, n);
+    pub unsafe extern "aapcs" fn __aeabi_memmove(dst: *mut u8, src: *const u8, n: usize) {
+        // SAFETY: memmove preconditions apply.
+        unsafe { crate::mem::memmove(dst, src, n) };
     }
 
+    /// `memmove` for 4-byte alignment.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memmove` requirements apply. Additionally, `dest` and `src` must be aligned to
+    /// four bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
-    pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
-        __aeabi_memmove(dest, src, n);
+    pub unsafe extern "aapcs" fn __aeabi_memmove4(dst: *mut u8, src: *const u8, n: usize) {
+        debug_assert!(dst.addr() & 3 == 0);
+        debug_assert!(src.addr() & 3 == 0);
+
+        // SAFETY: same preconditions, less strict aligment.
+        unsafe { __aeabi_memmove(dst, src, n) };
     }
 
+    /// `memmove` for 8-byte alignment.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memmove` requirements apply. Additionally, `dst` and `src` must be aligned to
+    /// eight bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
-    pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
-        __aeabi_memmove(dest, src, n);
+    pub unsafe extern "aapcs" fn __aeabi_memmove8(dst: *mut u8, src: *const u8, n: usize) {
+        debug_assert!(dst.addr() & 7 == 0);
+        debug_assert!(src.addr() & 7 == 0);
+
+        // SAFETY: memmove preconditions apply, less strict alignment.
+        unsafe { __aeabi_memmove(dst, src, n) };
     }
 
+    /// `memset` provided with the `aapcs` ABI.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memset` requirements apply.
     #[cfg(not(target_vendor = "apple"))]
-    pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
+    pub unsafe extern "aapcs" fn __aeabi_memset(dst: *mut u8, n: usize, c: i32) {
         // Note the different argument order
-        crate::mem::memset(dest, c, n);
+        // SAFETY: memset preconditions apply.
+        unsafe { crate::mem::memset(dst, c, n) };
     }
 
+    /// `memset` for 4-byte alignment.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memset` requirements apply. Additionally, `dest` and `src` must be aligned to
+    /// four bytes.
     #[cfg(not(target_vendor = "apple"))]
-    pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
-        let mut dest = dest as *mut u32;
+    pub unsafe extern "aapcs" fn __aeabi_memset4(dst: *mut u8, n: usize, c: i32) {
+        let mut dst = dst.cast::<u32>();
+        debug_assert!(dst.is_aligned());
         let mut n = n;
 
         let byte = (c as u32) & 0xff;
         let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
 
         while n >= 4 {
-            *dest = c;
-            dest = dest.offset(1);
+            // SAFETY: `dst` is valid for at least 4 bytes, from `memset` preconditions and
+            // the loop guard.
+            unsafe { *dst = c };
+
+            // FIXME(addr): if we can make this end-of-address-space safe without losing
+            // performance, we may want to consider that.
+            // SAFETY: memcpy is not expected to work at the end of the address space
+            unsafe {
+                dst = dst.offset(1);
+            }
             n -= 4;
         }
 
-        __aeabi_memset(dest as *mut u8, n, byte as i32);
+        // SAFETY: `dst` will still be valid for `n` bytes
+        unsafe { __aeabi_memset(dst.cast::<u8>(), n, byte as i32) };
     }
 
+    /// `memset` for 8-byte alignment.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memset` requirements apply. Additionally, `dst` and `src` must be aligned to
+    /// eight bytes.
     #[cfg(not(target_vendor = "apple"))]
-    pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
-        __aeabi_memset4(dest, n, c);
+    pub unsafe extern "aapcs" fn __aeabi_memset8(dst: *mut u8, n: usize, c: i32) {
+        debug_assert!(dst.addr() & 7 == 0);
+
+        // SAFETY: memset preconditions apply, less strict alignment.
+        unsafe { __aeabi_memset4(dst, n, c) };
     }
 
+    /// `memclr` provided with the `aapcs` ABI.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memclr` requirements apply.
     #[cfg(not(target_vendor = "apple"))]
-    pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
-        __aeabi_memset(dest, n, 0);
+    pub unsafe extern "aapcs" fn __aeabi_memclr(dst: *mut u8, n: usize) {
+        // SAFETY: memclr preconditions apply, less strict alignment.
+        unsafe { __aeabi_memset(dst, n, 0) };
     }
 
+    /// `memclr` for 4-byte alignment.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memclr` requirements apply. Additionally, `dest` and `src` must be aligned to
+    /// four bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
-    pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
-        __aeabi_memset4(dest, n, 0);
+    pub unsafe extern "aapcs" fn __aeabi_memclr4(dst: *mut u8, n: usize) {
+        debug_assert!(dst.addr() & 3 == 0);
+
+        // SAFETY: memclr preconditions apply, less strict alignment.
+        unsafe { __aeabi_memset4(dst, n, 0) };
     }
 
+    /// `memclr` for 8-byte alignment.
+    ///
+    /// # Safety
+    ///
+    /// Usual `memclr` requirements apply. Additionally, `dst` and `src` must be aligned to
+    /// eight bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
-    pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
-        __aeabi_memset4(dest, n, 0);
+    pub unsafe extern "aapcs" fn __aeabi_memclr8(dst: *mut u8, n: usize) {
+        debug_assert!(dst.addr() & 7 == 0);
+
+        // SAFETY: memclr preconditions apply, less strict alignment.
+        unsafe { __aeabi_memset4(dst, n, 0) };
     }
 }
diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs
index 7523a00cf..6a6b28067 100644
--- a/compiler-builtins/src/lib.rs
+++ b/compiler-builtins/src/lib.rs
@@ -22,6 +22,9 @@
 #![allow(clippy::manual_swap)]
 // Support compiling on both stage0 and stage1 which may differ in supported stable features.
 #![allow(stable_features)]
+// By default, disallow this as it is forbidden in edition 2024. There is a lot of unsafe code to
+// be migrated, however, so exceptions exist.
+#![warn(unsafe_op_in_unsafe_fn)]
 
 // We disable #[no_mangle] for tests so that we can verify the test results
 // against the native compiler-rt implementations of the builtins.
diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs
index 4fa53656e..dbf715534 100644
--- a/compiler-builtins/src/macros.rs
+++ b/compiler-builtins/src/macros.rs
@@ -256,7 +256,7 @@ macro_rules! intrinsics {
 
         #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))]
         mod $name {
-            #[no_mangle]
+            #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
             $(#[$($attr)*])*
             extern $abi fn $name( $($argname: u16),* ) $(-> $ret)? {
@@ -292,7 +292,7 @@ macro_rules! intrinsics {
 
         #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))]
         mod $name {
-            #[no_mangle]
+            #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
             $(#[$($attr)*])*
             extern $abi fn $name( $($argname: $ty),* ) -> u16 {
@@ -333,7 +333,7 @@ macro_rules! intrinsics {
 
         #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))]
         mod $name {
-            #[no_mangle]
+            #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
             $(#[$($attr)*])*
             extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
@@ -343,7 +343,7 @@ macro_rules! intrinsics {
 
         #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))]
         mod $alias {
-            #[no_mangle]
+            #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
             $(#[$($attr)*])*
             extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? {
@@ -410,7 +410,7 @@ macro_rules! intrinsics {
         #[cfg(all(feature = "mem", not(feature = "mangled-names")))]
         mod $name {
             $(#[$($attr)*])*
-            #[no_mangle]
+            #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
             unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
                 super::$name($($argname),*)
@@ -485,10 +485,11 @@ macro_rules! intrinsics {
         #[cfg(not(feature = "mangled-names"))]
         mod $name {
             $(#[$($attr)*])*
-            #[no_mangle]
+            #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
             $(unsafe $($empty)?)? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
-                super::$name($($argname),*)
+                // SAFETY: same preconditions.
+                $(unsafe $($empty)?)? { super::$name($($argname),*) }
             }
         }
 
diff --git a/compiler-builtins/src/mem/mod.rs b/compiler-builtins/src/mem/mod.rs
index ec160039d..6828f3804 100644
--- a/compiler-builtins/src/mem/mod.rs
+++ b/compiler-builtins/src/mem/mod.rs
@@ -1,5 +1,7 @@
 // Trying to satisfy clippy here is hopeless
 #![allow(clippy::style)]
+// FIXME(e2024): this eventually needs to be removed.
+#![allow(unsafe_op_in_unsafe_fn)]
 
 #[allow(warnings)]
 #[cfg(target_pointer_width = "16")]
diff --git a/compiler-builtins/src/x86_64.rs b/compiler-builtins/src/x86_64.rs
index af67e66e2..fc1190f79 100644
--- a/compiler-builtins/src/x86_64.rs
+++ b/compiler-builtins/src/x86_64.rs
@@ -44,7 +44,7 @@ intrinsics! {
 // HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM
 // support unless we emit the _fltused
 mod _fltused {
-    #[no_mangle]
+    #[unsafe(no_mangle)]
     #[used]
     #[cfg(target_os = "uefi")]
     static _fltused: i32 = 0;

From 99b4c195918149b632a061cf1ef56c313b13b396 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 29 Apr 2025 21:04:30 +0000
Subject: [PATCH 007/133] Resolve `unnecessary_transmutes` lints

These appeared in a later nightly. In compiler-builtins we can apply the
suggestion, but in `libm` we need to ignore them since `fx::from_bits`
is not `const` at the MSRV.

`clippy::uninlined_format_args` also seems to have gotten stricter, so
fix those here.
---
 builtins-test/tests/misc.rs           | 30 +++++++++++----------------
 builtins-test/tests/mul.rs            | 22 +++++++-------------
 compiler-builtins/build.rs            |  9 +++-----
 compiler-builtins/src/mem/impls.rs    |  2 +-
 libm/src/math/pow.rs                  | 11 ++--------
 libm/src/math/support/float_traits.rs |  6 ++++++
 6 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/builtins-test/tests/misc.rs b/builtins-test/tests/misc.rs
index b8c75c026..64a9d56f3 100644
--- a/builtins-test/tests/misc.rs
+++ b/builtins-test/tests/misc.rs
@@ -77,16 +77,13 @@ fn leading_zeros() {
             let lz1 = leading_zeros_default(x);
             let lz2 = leading_zeros_riscv(x);
             if lz0 != lz {
-                panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
+                panic!("__clzsi2({x}): std: {lz}, builtins: {lz0}");
             }
             if lz1 != lz {
-                panic!(
-                    "leading_zeros_default({}): std: {}, builtins: {}",
-                    x, lz, lz1
-                );
+                panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}");
             }
             if lz2 != lz {
-                panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
+                panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}");
             }
         });
     }
@@ -102,16 +99,13 @@ fn leading_zeros() {
             let lz1 = leading_zeros_default(x);
             let lz2 = leading_zeros_riscv(x);
             if lz0 != lz {
-                panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0);
+                panic!("__clzdi2({x}): std: {lz}, builtins: {lz0}");
             }
             if lz1 != lz {
-                panic!(
-                    "leading_zeros_default({}): std: {}, builtins: {}",
-                    x, lz, lz1
-                );
+                panic!("leading_zeros_default({x}): std: {lz}, builtins: {lz1}");
             }
             if lz2 != lz {
-                panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
+                panic!("leading_zeros_riscv({x}): std: {lz}, builtins: {lz2}");
             }
         });
     }
@@ -125,7 +119,7 @@ fn leading_zeros() {
             let lz = x.leading_zeros() as usize;
             let lz0 = __clzti2(x);
             if lz0 != lz {
-                panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0);
+                panic!("__clzti2({x}): std: {lz}, builtins: {lz0}");
             }
         });
     }
@@ -142,10 +136,10 @@ fn trailing_zeros() {
         let tz0 = __ctzsi2(x);
         let tz1 = trailing_zeros(x);
         if tz0 != tz {
-            panic!("__ctzsi2({}): std: {}, builtins: {}", x, tz, tz0);
+            panic!("__ctzsi2({x}): std: {tz}, builtins: {tz0}");
         }
         if tz1 != tz {
-            panic!("trailing_zeros({}): std: {}, builtins: {}", x, tz, tz1);
+            panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}");
         }
     });
     fuzz(N, |x: u64| {
@@ -156,10 +150,10 @@ fn trailing_zeros() {
         let tz0 = __ctzdi2(x);
         let tz1 = trailing_zeros(x);
         if tz0 != tz {
-            panic!("__ctzdi2({}): std: {}, builtins: {}", x, tz, tz0);
+            panic!("__ctzdi2({x}): std: {tz}, builtins: {tz0}");
         }
         if tz1 != tz {
-            panic!("trailing_zeros({}): std: {}, builtins: {}", x, tz, tz1);
+            panic!("trailing_zeros({x}): std: {tz}, builtins: {tz1}");
         }
     });
     fuzz(N, |x: u128| {
@@ -169,7 +163,7 @@ fn trailing_zeros() {
         let tz = x.trailing_zeros() as usize;
         let tz0 = __ctzti2(x);
         if tz0 != tz {
-            panic!("__ctzti2({}): std: {}, builtins: {}", x, tz, tz0);
+            panic!("__ctzti2({x}): std: {tz}, builtins: {tz0}");
         }
     });
 }
diff --git a/builtins-test/tests/mul.rs b/builtins-test/tests/mul.rs
index 198cacb34..58bc9ab4a 100644
--- a/builtins-test/tests/mul.rs
+++ b/builtins-test/tests/mul.rs
@@ -18,8 +18,8 @@ mod int_mul {
                         let mul1: $i = $fn(x, y);
                         if mul0 != mul1 {
                             panic!(
-                                "{}({}, {}): std: {}, builtins: {}",
-                                stringify!($fn), x, y, mul0, mul1
+                                "{func}({x}, {y}): std: {mul0}, builtins: {mul1}",
+                                func = stringify!($fn),
                             );
                         }
                     });
@@ -52,8 +52,8 @@ mod int_overflowing_mul {
                         let o1 = o1 != 0;
                         if mul0 != mul1 || o0 != o1 {
                             panic!(
-                                "{}({}, {}): std: ({}, {}), builtins: ({}, {})",
-                                stringify!($fn), x, y, mul0, o0, mul1, o1
+                                "{func}({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",
+                                func = stringify!($fn),
                             );
                         }
                     });
@@ -77,20 +77,14 @@ mod int_overflowing_mul {
             let (mul0, o0) = x.overflowing_mul(y);
             let mul1 = __rust_u128_mulo(x, y, &mut o1);
             if mul0 != mul1 || i32::from(o0) != o1 {
-                panic!(
-                    "__rust_u128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})",
-                    x, y, mul0, o0, mul1, o1
-                );
+                panic!("__rust_u128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",);
             }
             let x = x as i128;
             let y = y as i128;
             let (mul0, o0) = x.overflowing_mul(y);
             let mul1 = __rust_i128_mulo(x, y, &mut o1);
             if mul0 != mul1 || i32::from(o0) != o1 {
-                panic!(
-                    "__rust_i128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})",
-                    x, y, mul0, o0, mul1, o1
-                );
+                panic!("__rust_i128_mulo({x}, {y}): std: ({mul0}, {o0}), builtins: ({mul1}, {o1})",);
             }
         });
     }
@@ -109,8 +103,8 @@ macro_rules! float_mul {
                     let mul1: $f = $fn(x, y);
                     if !Float::eq_repr(mul0, mul1) {
                         panic!(
-                            "{}({:?}, {:?}): std: {:?}, builtins: {:?}",
-                            stringify!($fn), x, y, mul0, mul1
+                            "{func}({x:?}, {y:?}): std: {mul0:?}, builtins: {mul1:?}",
+                            func = stringify!($fn),
                         );
                     }
                 });
diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs
index 04369a4aa..90d98ec7c 100644
--- a/compiler-builtins/build.rs
+++ b/compiler-builtins/build.rs
@@ -137,7 +137,7 @@ fn aarch64_symbol(ordering: Ordering) -> &'static str {
         Ordering::Acquire => "acq",
         Ordering::Release => "rel",
         Ordering::AcqRel => "acq_rel",
-        _ => panic!("unknown symbol for {:?}", ordering),
+        _ => panic!("unknown symbol for {ordering:?}"),
     }
 }
 
@@ -229,7 +229,7 @@ fn configure_check_cfg() {
 
         for op_size in op_sizes {
             for ordering in ["relax", "acq", "rel", "acq_rel"] {
-                aarch_atomic.push(format!("__aarch64_{}{}_{}", aarch_op, op_size, ordering));
+                aarch_atomic.push(format!("__aarch64_{aarch_op}{op_size}_{ordering}"));
             }
         }
     }
@@ -239,10 +239,7 @@ fn configure_check_cfg() {
         .copied()
         .chain(aarch_atomic.iter().map(|s| s.as_str()))
     {
-        println!(
-            "cargo::rustc-check-cfg=cfg({}, values(\"optimized-c\"))",
-            fn_name
-        );
+        println!("cargo::rustc-check-cfg=cfg({fn_name}, values(\"optimized-c\"))",);
     }
 
     // Rustc is unaware of sparc target features, but this does show up from
diff --git a/compiler-builtins/src/mem/impls.rs b/compiler-builtins/src/mem/impls.rs
index dc12d6996..14a478748 100644
--- a/compiler-builtins/src/mem/impls.rs
+++ b/compiler-builtins/src/mem/impls.rs
@@ -38,7 +38,7 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
     // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which
     // is translated to memcpy in LLVM.
     let x_read = (x as *const [u8; core::mem::size_of::<usize>()]).read();
-    core::mem::transmute(x_read)
+    usize::from_ne_bytes(x_read)
 }
 
 /// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs
index 7e7d049b9..94ae31cf0 100644
--- a/libm/src/math/pow.rs
+++ b/libm/src/math/pow.rs
@@ -452,11 +452,7 @@ mod tests {
             } else {
                 pow(base, exponent) == expected
             },
-            "{} ** {} was {} instead of {}",
-            base,
-            exponent,
-            res,
-            expected
+            "{base} ** {exponent} was {res} instead of {expected}",
         );
     }
 
@@ -486,10 +482,7 @@ mod tests {
                     } else {
                         exp == res
                     },
-                    "test for {} was {} instead of {}",
-                    val,
-                    res,
-                    exp
+                    "test for {val} was {res} instead of {exp}",
                 );
             })
         });
diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs
index 8094a7b84..4c866ef10 100644
--- a/libm/src/math/support/float_traits.rs
+++ b/libm/src/math/support/float_traits.rs
@@ -1,3 +1,5 @@
+#![allow(unknown_lints)] // FIXME(msrv) we shouldn't need this
+
 use core::{fmt, mem, ops};
 
 use super::int_traits::{CastFrom, Int, MinInt};
@@ -344,24 +346,28 @@ float_impl!(
 /* FIXME(msrv): vendor some things that are not const stable at our MSRV */
 
 /// `f32::from_bits`
+#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
 pub const fn f32_from_bits(bits: u32) -> f32 {
     // SAFETY: POD cast with no preconditions
     unsafe { mem::transmute::<u32, f32>(bits) }
 }
 
 /// `f32::to_bits`
+#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
 pub const fn f32_to_bits(x: f32) -> u32 {
     // SAFETY: POD cast with no preconditions
     unsafe { mem::transmute::<f32, u32>(x) }
 }
 
 /// `f64::from_bits`
+#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
 pub const fn f64_from_bits(bits: u64) -> f64 {
     // SAFETY: POD cast with no preconditions
     unsafe { mem::transmute::<u64, f64>(bits) }
 }
 
 /// `f64::to_bits`
+#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
 pub const fn f64_to_bits(x: f64) -> u64 {
     // SAFETY: POD cast with no preconditions
     unsafe { mem::transmute::<f64, u64>(x) }

From 91963f59b3d2b03e947707da15ee4aa19e3d214d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 29 Apr 2025 20:50:58 +0000
Subject: [PATCH 008/133] Move `fma` implementations to `mod generic`

This will not build correctly, the move is done as a separate step from
the rest of refactoring so git's history is cleaner.
---
 libm/src/math/{ => generic}/fma.rs      | 0
 libm/src/math/{ => generic}/fma_wide.rs | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename libm/src/math/{ => generic}/fma.rs (100%)
 rename libm/src/math/{ => generic}/fma_wide.rs (100%)

diff --git a/libm/src/math/fma.rs b/libm/src/math/generic/fma.rs
similarity index 100%
rename from libm/src/math/fma.rs
rename to libm/src/math/generic/fma.rs
diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/generic/fma_wide.rs
similarity index 100%
rename from libm/src/math/fma_wide.rs
rename to libm/src/math/generic/fma_wide.rs

From f456aa8baf0b108208332dc4bed63b6e70639b67 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 29 Apr 2025 20:52:54 +0000
Subject: [PATCH 009/133] Refactor the fma modules

Move implementations to `generic/` like the other functions. This also
allows us to combine the `fma` and `fma_wide` modules.
---
 etc/function-definitions.json     |   2 +-
 libm/src/math/fma.rs              | 165 ++++++++++++++++++++++++++++++
 libm/src/math/generic/fma.rs      | 133 +-----------------------
 libm/src/math/generic/fma_wide.rs |  44 +-------
 libm/src/math/generic/mod.rs      |   4 +
 libm/src/math/mod.rs              |   6 +-
 6 files changed, 179 insertions(+), 175 deletions(-)
 create mode 100644 libm/src/math/fma.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 3e33343c4..9e5774eaf 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -350,7 +350,7 @@
     "fmaf": {
         "sources": [
             "libm/src/math/arch/aarch64.rs",
-            "libm/src/math/fma_wide.rs"
+            "libm/src/math/fma.rs"
         ],
         "type": "f32"
     },
diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs
new file mode 100644
index 000000000..78f0f8992
--- /dev/null
+++ b/libm/src/math/fma.rs
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: musl src/math/fma.c, fmaf.c Ported to generic Rust algorithm in 2025, TG. */
+
+use super::generic;
+use crate::support::Round;
+
+// Placeholder so we can have `fmaf16` in the `Float` trait.
+#[allow(unused)]
+#[cfg(f16_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
+    unimplemented!()
+}
+
+/// Floating multiply add (f32)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
+    select_implementation! {
+        name: fmaf,
+        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        args: x, y, z,
+    }
+
+    generic::fma_wide_round(x, y, z, Round::Nearest).val
+}
+
+/// Fused multiply add (f64)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fma(x: f64, y: f64, z: f64) -> f64 {
+    select_implementation! {
+        name: fma,
+        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        args: x, y, z,
+    }
+
+    generic::fma_round(x, y, z, Round::Nearest).val
+}
+
+/// Fused multiply add (f128)
+///
+/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
+#[cfg(f128_enabled)]
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
+    generic::fma_round(x, y, z, Round::Nearest).val
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{CastFrom, CastInto, Float, FpResult, HInt, MinInt, Round, Status};
+
+    /// Test the generic `fma_round` algorithm for a given float.
+    fn spec_test<F>(f: impl Fn(F, F, F) -> F)
+    where
+        F: Float,
+        F: CastFrom<F::SignedInt>,
+        F: CastFrom<i8>,
+        F::Int: HInt,
+        u32: CastInto<F::Int>,
+    {
+        let x = F::from_bits(F::Int::ONE);
+        let y = F::from_bits(F::Int::ONE);
+        let z = F::ZERO;
+
+        // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
+        // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
+        // exact result"
+        assert_biteq!(f(x, y, z), F::ZERO);
+        assert_biteq!(f(x, -y, z), F::NEG_ZERO);
+        assert_biteq!(f(-x, y, z), F::NEG_ZERO);
+        assert_biteq!(f(-x, -y, z), F::ZERO);
+    }
+
+    #[test]
+    fn spec_test_f32() {
+        spec_test::<f32>(fmaf);
+
+        // Also do a small check that the non-widening version works for f32 (this should ideally
+        // get tested some more).
+        spec_test::<f32>(|x, y, z| generic::fma_round(x, y, z, Round::Nearest).val);
+    }
+
+    #[test]
+    fn spec_test_f64() {
+        spec_test::<f64>(fma);
+
+        let expect_underflow = [
+            (
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.ffffffffffffp-1023"),
+                hf64!("0x0.ffffffffffff8p-1022"),
+            ),
+            (
+                // FIXME: we raise underflow but this should only be inexact (based on C and
+                // `rustc_apfloat`).
+                hf64!("0x1.0p-1070"),
+                hf64!("0x1.0p-1070"),
+                hf64!("-0x1.0p-1022"),
+                hf64!("-0x1.0p-1022"),
+            ),
+        ];
+
+        for (x, y, z, res) in expect_underflow {
+            let FpResult { val, status } = generic::fma_round(x, y, z, Round::Nearest);
+            assert_biteq!(val, res);
+            assert_eq!(status, Status::UNDERFLOW);
+        }
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_test_f128() {
+        spec_test::<f128>(fmaf128);
+    }
+
+    #[test]
+    fn issue_263() {
+        let a = f32::from_bits(1266679807);
+        let b = f32::from_bits(1300234242);
+        let c = f32::from_bits(1115553792);
+        let expected = f32::from_bits(1501560833);
+        assert_eq!(fmaf(a, b, c), expected);
+    }
+
+    #[test]
+    fn fma_segfault() {
+        // These two inputs cause fma to segfault on release due to overflow:
+        assert_eq!(
+            fma(
+                -0.0000000000000002220446049250313,
+                -0.0000000000000002220446049250313,
+                -0.0000000000000002220446049250313
+            ),
+            -0.00000000000000022204460492503126,
+        );
+
+        let result = fma(-0.992, -0.992, -0.992);
+        //force rounding to storage format on x87 to prevent superious errors.
+        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        let result = force_eval!(result);
+        assert_eq!(result, -0.007936000000000007,);
+    }
+
+    #[test]
+    fn fma_sbb() {
+        assert_eq!(
+            fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN),
+            -3991680619069439e277
+        );
+    }
+
+    #[test]
+    fn fma_underflow() {
+        assert_eq!(
+            fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320),
+            0.0,
+        );
+    }
+}
diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs
index 8856e63f5..aaf459d1b 100644
--- a/libm/src/math/generic/fma.rs
+++ b/libm/src/math/generic/fma.rs
@@ -1,31 +1,9 @@
 /* SPDX-License-Identifier: MIT */
 /* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
 
-use super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
-use super::{CastFrom, CastInto, Float, Int, MinInt};
-
-/// Fused multiply add (f64)
-///
-/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fma(x: f64, y: f64, z: f64) -> f64 {
-    select_implementation! {
-        name: fma,
-        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
-        args: x, y, z,
-    }
-
-    fma_round(x, y, z, Round::Nearest).val
-}
-
-/// Fused multiply add (f128)
-///
-/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
-    fma_round(x, y, z, Round::Nearest).val
-}
+use crate::support::{
+    CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status,
+};
 
 /// Fused multiply-add that works when there is not a larger float size available. Computes
 /// `(x * y) + z`.
@@ -234,7 +212,7 @@ where
     }
 
     // Use our exponent to scale the final value.
-    FpResult::new(super::generic::scalbn(r, e), status)
+    FpResult::new(super::scalbn(r, e), status)
 }
 
 /// Representation of `F` that has handled subnormals.
@@ -298,106 +276,3 @@ impl<F: Float> Norm<F> {
         self.e > Self::ZERO_INF_NAN as i32
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    /// Test the generic `fma_round` algorithm for a given float.
-    fn spec_test<F>()
-    where
-        F: Float,
-        F: CastFrom<F::SignedInt>,
-        F: CastFrom<i8>,
-        F::Int: HInt,
-        u32: CastInto<F::Int>,
-    {
-        let x = F::from_bits(F::Int::ONE);
-        let y = F::from_bits(F::Int::ONE);
-        let z = F::ZERO;
-
-        let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val;
-
-        // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
-        // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
-        // exact result"
-        assert_biteq!(fma(x, y, z), F::ZERO);
-        assert_biteq!(fma(x, -y, z), F::NEG_ZERO);
-        assert_biteq!(fma(-x, y, z), F::NEG_ZERO);
-        assert_biteq!(fma(-x, -y, z), F::ZERO);
-    }
-
-    #[test]
-    fn spec_test_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    fn spec_test_f64() {
-        spec_test::<f64>();
-
-        let expect_underflow = [
-            (
-                hf64!("0x1.0p-1070"),
-                hf64!("0x1.0p-1070"),
-                hf64!("0x1.ffffffffffffp-1023"),
-                hf64!("0x0.ffffffffffff8p-1022"),
-            ),
-            (
-                // FIXME: we raise underflow but this should only be inexact (based on C and
-                // `rustc_apfloat`).
-                hf64!("0x1.0p-1070"),
-                hf64!("0x1.0p-1070"),
-                hf64!("-0x1.0p-1022"),
-                hf64!("-0x1.0p-1022"),
-            ),
-        ];
-
-        for (x, y, z, res) in expect_underflow {
-            let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
-            assert_biteq!(val, res);
-            assert_eq!(status, Status::UNDERFLOW);
-        }
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_test_f128() {
-        spec_test::<f128>();
-    }
-
-    #[test]
-    fn fma_segfault() {
-        // These two inputs cause fma to segfault on release due to overflow:
-        assert_eq!(
-            fma(
-                -0.0000000000000002220446049250313,
-                -0.0000000000000002220446049250313,
-                -0.0000000000000002220446049250313
-            ),
-            -0.00000000000000022204460492503126,
-        );
-
-        let result = fma(-0.992, -0.992, -0.992);
-        //force rounding to storage format on x87 to prevent superious errors.
-        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
-        let result = force_eval!(result);
-        assert_eq!(result, -0.007936000000000007,);
-    }
-
-    #[test]
-    fn fma_sbb() {
-        assert_eq!(
-            fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN),
-            -3991680619069439e277
-        );
-    }
-
-    #[test]
-    fn fma_underflow() {
-        assert_eq!(
-            fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320),
-            0.0,
-        );
-    }
-}
diff --git a/libm/src/math/generic/fma_wide.rs b/libm/src/math/generic/fma_wide.rs
index f268c2f14..a2ef59d3e 100644
--- a/libm/src/math/generic/fma_wide.rs
+++ b/libm/src/math/generic/fma_wide.rs
@@ -1,30 +1,6 @@
-/* SPDX-License-Identifier: MIT */
-/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */
-
-use super::support::{FpResult, IntTy, Round, Status};
-use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt};
-
-// Placeholder so we can have `fmaf16` in the `Float` trait.
-#[allow(unused)]
-#[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
-    unimplemented!()
-}
-
-/// Floating multiply add (f32)
-///
-/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
-    select_implementation! {
-        name: fmaf,
-        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
-        args: x, y, z,
-    }
-
-    fma_wide_round(x, y, z, Round::Nearest).val
-}
+use crate::support::{
+    CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status,
+};
 
 /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
 /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
@@ -95,17 +71,3 @@ where
 
     FpResult::ok(B::from_bits(ui).narrow())
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn issue_263() {
-        let a = f32::from_bits(1266679807);
-        let b = f32::from_bits(1300234242);
-        let c = f32::from_bits(1115553792);
-        let expected = f32::from_bits(1501560833);
-        assert_eq!(fmaf(a, b, c), expected);
-    }
-}
diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs
index 35846351a..9d497a03f 100644
--- a/libm/src/math/generic/mod.rs
+++ b/libm/src/math/generic/mod.rs
@@ -6,6 +6,8 @@ mod copysign;
 mod fabs;
 mod fdim;
 mod floor;
+mod fma;
+mod fma_wide;
 mod fmax;
 mod fmaximum;
 mod fmaximum_num;
@@ -24,6 +26,8 @@ pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
+pub use fma::fma_round;
+pub use fma_wide::fma_wide_round;
 pub use fmax::fmax;
 pub use fmaximum::fmaximum;
 pub use fmaximum_num::fmaximum_num;
diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs
index 949c18b40..ce9b8fc58 100644
--- a/libm/src/math/mod.rs
+++ b/libm/src/math/mod.rs
@@ -159,7 +159,6 @@ mod fabs;
 mod fdim;
 mod floor;
 mod fma;
-mod fma_wide;
 mod fmin_fmax;
 mod fminimum_fmaximum;
 mod fminimum_fmaximum_num;
@@ -254,8 +253,7 @@ pub use self::expm1f::expm1f;
 pub use self::fabs::{fabs, fabsf};
 pub use self::fdim::{fdim, fdimf};
 pub use self::floor::{floor, floorf};
-pub use self::fma::fma;
-pub use self::fma_wide::fmaf;
+pub use self::fma::{fma, fmaf};
 pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
 pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf};
 pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf};
@@ -336,7 +334,7 @@ cfg_if! {
         // verify-sorted-end
 
         #[allow(unused_imports)]
-        pub(crate) use self::fma_wide::fmaf16;
+        pub(crate) use self::fma::fmaf16;
     }
 }
 

From 7ccb126f88de3330eb6472036e269c8b73c0b94e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 1 May 2025 18:52:12 +0000
Subject: [PATCH 010/133] builtins-test: Remove `no_mangle` from
 `eh_personality`

Rustc now mangles these symbols on its own, so `no_mangle` is rejected
as an error.
---
 builtins-test-intrinsics/src/main.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs
index 18f943eff..1fa7b0091 100644
--- a/builtins-test-intrinsics/src/main.rs
+++ b/builtins-test-intrinsics/src/main.rs
@@ -682,7 +682,6 @@ pub fn _Unwind_Resume() {}
 
 #[cfg(not(any(windows, target_os = "cygwin")))]
 #[lang = "eh_personality"]
-#[unsafe(no_mangle)]
 pub extern "C" fn eh_personality() {}
 
 #[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin"))]

From 725484ee6758e7a647df30c4661e356504e222e6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 2 May 2025 00:46:21 +0000
Subject: [PATCH 011/133] fmaf: Add a test case from a MinGW failure

This is a known problem in the MinGW fmaf implementation, identified at
[1].  Make sure our implementation passes this edge case.

[1]: https://github.com/rust-lang/rust/issues/140515
---
 libm-test/src/generate/case_list.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/libm-test/src/generate/case_list.rs b/libm-test/src/generate/case_list.rs
index f1e6fcec3..43b28722f 100644
--- a/libm-test/src/generate/case_list.rs
+++ b/libm-test/src/generate/case_list.rs
@@ -269,7 +269,18 @@ fn fma_cases() -> Vec<TestCase<op::fma::Routine>> {
 }
 
 fn fmaf_cases() -> Vec<TestCase<op::fmaf::Routine>> {
-    vec![]
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[
+            // Known rounding error for some implementations (notably MinGW)
+            (
+                (-1.9369631e13f32, 2.1513551e-7, -1.7354427e-24),
+                Some(-4167095.8),
+            ),
+        ],
+    );
+    v
 }
 
 #[cfg(f128_enabled)]

From f83962ed9a1f850876860e04d552c5e43ed888f8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 1 May 2025 18:41:05 +0000
Subject: [PATCH 012/133] update-api-list: Match subdirectories within arch

---
 etc/update-api-list.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/etc/update-api-list.py b/etc/update-api-list.py
index 0770a8b20..28ff22f4c 100755
--- a/etc/update-api-list.py
+++ b/etc/update-api-list.py
@@ -123,7 +123,9 @@ def _init_defs(self, index: IndexTy) -> None:
 
         # A lot of the `arch` module is often configured out so doesn't show up in docs. Use
         # string matching as a fallback.
-        for fname in glob("libm/src/math/arch/**.rs", root_dir=ROOT_DIR):
+        for fname in glob(
+            "libm/src/math/arch/**/*.rs", root_dir=ROOT_DIR, recursive=True
+        ):
             contents = (ROOT_DIR.joinpath(fname)).read_text()
 
             for name in self.public_functions:

From 6e4255a71258534d70167b8942c7bd8a5c0005dc Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 1 May 2025 17:49:56 +0000
Subject: [PATCH 013/133] Rename the i686 module to x86

This module is used for both i686 and x86-64.
---
 etc/function-definitions.json          | 4 ++--
 libm/src/math/arch/mod.rs              | 4 ++--
 libm/src/math/arch/{i686.rs => x86.rs} | 0
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename libm/src/math/arch/{i686.rs => x86.rs} (100%)

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 9e5774eaf..691205ddf 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -932,8 +932,8 @@
     "sqrt": {
         "sources": [
             "libm/src/math/arch/aarch64.rs",
-            "libm/src/math/arch/i686.rs",
             "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/x86.rs",
             "libm/src/math/generic/sqrt.rs",
             "libm/src/math/sqrt.rs"
         ],
@@ -942,8 +942,8 @@
     "sqrtf": {
         "sources": [
             "libm/src/math/arch/aarch64.rs",
-            "libm/src/math/arch/i686.rs",
             "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/x86.rs",
             "libm/src/math/generic/sqrt.rs",
             "libm/src/math/sqrt.rs"
         ],
diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs
index d9f2aad66..67352f90c 100644
--- a/libm/src/math/arch/mod.rs
+++ b/libm/src/math/arch/mod.rs
@@ -15,8 +15,8 @@ cfg_if! {
             ceil, ceilf, fabs, fabsf, floor, floorf, rint, rintf, sqrt, sqrtf, trunc, truncf,
         };
     } else if #[cfg(target_feature = "sse2")] {
-        mod i686;
-        pub use i686::{sqrt, sqrtf};
+        mod x86;
+        pub use x86::{sqrt, sqrtf};
     } else if #[cfg(all(
         any(target_arch = "aarch64", target_arch = "arm64ec"),
         target_feature = "neon"
diff --git a/libm/src/math/arch/i686.rs b/libm/src/math/arch/x86.rs
similarity index 100%
rename from libm/src/math/arch/i686.rs
rename to libm/src/math/arch/x86.rs

From a2f64407618e95bc8767d6dbabfa7ac0348f5bc8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 29 Apr 2025 22:16:41 +0000
Subject: [PATCH 014/133] Use runtime feature detection for fma routines on x86

Get performance closer to the glibc implementations by adding assembly
fma routines, with runtime feature detection so they are used even if
not compiled with `+fma` (as the distributed standard library is often
not). Glibc uses ifuncs, this implementation stores a function pointer
in an atomic.

Results of CPU flags are also cached in order to avoid repeating the
startup time in calls to different functions. The feature detection code
is a slightly simplified version of `std-detect`.

Musl sources were used as a reference [1].

Fixes: https://github.com/rust-lang/rust/issues/140452 once synced

[1]: https://github.com/bminor/musl/blob/c47ad25ea3b484e10326f933e927c0bc8cded3da/src/math/x32/fma.c
---
 etc/function-definitions.json           |   2 +
 libm/src/math/arch/mod.rs               |   2 +-
 libm/src/math/arch/x86.rs               |   5 +
 libm/src/math/arch/x86/detect.rs        | 229 ++++++++++++++++++++++++
 libm/src/math/arch/x86/fma.rs           | 134 ++++++++++++++
 libm/src/math/fma.rs                    |  10 +-
 libm/src/math/support/feature_detect.rs | 206 +++++++++++++++++++++
 libm/src/math/support/mod.rs            |   3 +
 8 files changed, 588 insertions(+), 3 deletions(-)
 create mode 100644 libm/src/math/arch/x86/detect.rs
 create mode 100644 libm/src/math/arch/x86/fma.rs
 create mode 100644 libm/src/math/support/feature_detect.rs

diff --git a/etc/function-definitions.json b/etc/function-definitions.json
index 691205ddf..4f796905b 100644
--- a/etc/function-definitions.json
+++ b/etc/function-definitions.json
@@ -343,6 +343,7 @@
     "fma": {
         "sources": [
             "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/x86/fma.rs",
             "libm/src/math/fma.rs"
         ],
         "type": "f64"
@@ -350,6 +351,7 @@
     "fmaf": {
         "sources": [
             "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/x86/fma.rs",
             "libm/src/math/fma.rs"
         ],
         "type": "f32"
diff --git a/libm/src/math/arch/mod.rs b/libm/src/math/arch/mod.rs
index 67352f90c..984ae7f31 100644
--- a/libm/src/math/arch/mod.rs
+++ b/libm/src/math/arch/mod.rs
@@ -16,7 +16,7 @@ cfg_if! {
         };
     } else if #[cfg(target_feature = "sse2")] {
         mod x86;
-        pub use x86::{sqrt, sqrtf};
+        pub use x86::{sqrt, sqrtf, fma, fmaf};
     } else if #[cfg(all(
         any(target_arch = "aarch64", target_arch = "arm64ec"),
         target_feature = "neon"
diff --git a/libm/src/math/arch/x86.rs b/libm/src/math/arch/x86.rs
index 3e1d19bfa..454aa2850 100644
--- a/libm/src/math/arch/x86.rs
+++ b/libm/src/math/arch/x86.rs
@@ -1,5 +1,10 @@
 //! Architecture-specific support for x86-32 and x86-64 with SSE2
 
+mod detect;
+mod fma;
+
+pub use fma::{fma, fmaf};
+
 pub fn sqrtf(mut x: f32) -> f32 {
     // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory
     // access or side effects.
diff --git a/libm/src/math/arch/x86/detect.rs b/libm/src/math/arch/x86/detect.rs
new file mode 100644
index 000000000..71c3281dc
--- /dev/null
+++ b/libm/src/math/arch/x86/detect.rs
@@ -0,0 +1,229 @@
+#[cfg(target_arch = "x86")]
+use core::arch::x86::{__cpuid, __cpuid_count, _xgetbv, CpuidResult};
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::{__cpuid, __cpuid_count, _xgetbv, CpuidResult};
+
+use crate::support::{Flags, get_or_init_flags_cache};
+
+/// CPU features that get cached (doesn't correlate to anything on the CPU).
+pub mod cpu_flags {
+    use crate::support::unique_masks;
+
+    unique_masks! {
+        u32,
+        SSE3,
+        F16C,
+        SSE,
+        SSE2,
+        ERMSB,
+        MOVRS,
+        FMA,
+        FMA4,
+        AVX512FP16,
+        AVX512BF16,
+    }
+}
+
+/// Get CPU features, loading from a cache if available.
+pub fn get_cpu_features() -> Flags {
+    use core::sync::atomic::AtomicU32;
+    static CACHE: AtomicU32 = AtomicU32::new(0);
+    get_or_init_flags_cache(&CACHE, load_x86_features)
+}
+
+/// Read from cpuid and translate to a `Flags` instance, using `cpu_flags`.
+///
+/// Implementation is taken from [std-detect][std-detect].
+///
+/// [std-detect]: https://github.com/rust-lang/stdarch/blob/690b3a6334d482874163bd6fcef408e0518febe9/crates/std_detect/src/detect/os/x86.rs#L142
+fn load_x86_features() -> Flags {
+    let mut value = Flags::empty();
+
+    if cfg!(target_env = "sgx") {
+        // doesn't support this because it is untrusted data
+        return Flags::empty();
+    }
+
+    // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
+    // has `cpuid` support.
+
+    // 0. EAX = 0: Basic Information:
+    // - EAX returns the "Highest Function Parameter", that is, the maximum leaf
+    //   value for subsequent calls of `cpuinfo` in range [0, 0x8000_0000].
+    // - The vendor ID is stored in 12 u8 ascii chars, returned in EBX, EDX, and ECX
+    //   (in that order)
+    let mut vendor_id = [0u8; 12];
+    let max_basic_leaf;
+    unsafe {
+        let CpuidResult { eax, ebx, ecx, edx } = __cpuid(0);
+        max_basic_leaf = eax;
+        vendor_id[0..4].copy_from_slice(&ebx.to_ne_bytes());
+        vendor_id[4..8].copy_from_slice(&edx.to_ne_bytes());
+        vendor_id[8..12].copy_from_slice(&ecx.to_ne_bytes());
+    }
+
+    if max_basic_leaf < 1 {
+        // Earlier Intel 486, CPUID not implemented
+        return value;
+    }
+
+    // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
+    // Contains information about most x86 features.
+    let CpuidResult { ecx, edx, .. } = unsafe { __cpuid(0x0000_0001_u32) };
+    let proc_info_ecx = Flags::from_bits(ecx);
+    let proc_info_edx = Flags::from_bits(edx);
+
+    // EAX = 7: Queries "Extended Features";
+    // Contains information about bmi,bmi2, and avx2 support.
+    let mut extended_features_ebx = Flags::empty();
+    let mut extended_features_edx = Flags::empty();
+    let mut extended_features_eax_leaf_1 = Flags::empty();
+    if max_basic_leaf >= 7 {
+        let CpuidResult { ebx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
+        extended_features_ebx = Flags::from_bits(ebx);
+        extended_features_edx = Flags::from_bits(edx);
+
+        let CpuidResult { eax, .. } = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
+        extended_features_eax_leaf_1 = Flags::from_bits(eax)
+    }
+
+    // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
+    // - EAX returns the max leaf value for extended information, that is,
+    //   `cpuid` calls in range [0x8000_0000; u32::MAX]:
+    let extended_max_basic_leaf = unsafe { __cpuid(0x8000_0000_u32) }.eax;
+
+    // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature Bits"
+    let mut extended_proc_info_ecx = Flags::empty();
+    if extended_max_basic_leaf >= 1 {
+        let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
+        extended_proc_info_ecx = Flags::from_bits(ecx);
+    }
+
+    let mut enable = |regflags: Flags, regbit, flag| {
+        if regflags.test_nth(regbit) {
+            value.insert(flag);
+        }
+    };
+
+    enable(proc_info_ecx, 0, cpu_flags::SSE3);
+    enable(proc_info_ecx, 29, cpu_flags::F16C);
+    enable(proc_info_edx, 25, cpu_flags::SSE);
+    enable(proc_info_edx, 26, cpu_flags::SSE2);
+    enable(extended_features_ebx, 9, cpu_flags::ERMSB);
+    enable(extended_features_eax_leaf_1, 31, cpu_flags::MOVRS);
+
+    // `XSAVE` and `AVX` support:
+    let cpu_xsave = proc_info_ecx.test_nth(26);
+    if cpu_xsave {
+        // 0. Here the CPU supports `XSAVE`.
+
+        // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
+        //    supports saving the state of the AVX/AVX2 vector registers on
+        //    context-switches, see:
+        //
+        // - [intel: is avx enabled?][is_avx_enabled],
+        // - [mozilla: sse.cpp][mozilla_sse_cpp].
+        //
+        // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
+        // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
+        let cpu_osxsave = proc_info_ecx.test_nth(27);
+
+        if cpu_osxsave {
+            // 2. The OS must have signaled the CPU that it supports saving and
+            // restoring the:
+            //
+            // * SSE -> `XCR0.SSE[1]`
+            // * AVX -> `XCR0.AVX[2]`
+            // * AVX-512 -> `XCR0.AVX-512[7:5]`.
+            // * AMX -> `XCR0.AMX[18:17]`
+            //
+            // by setting the corresponding bits of `XCR0` to `1`.
+            //
+            // This is safe because the CPU supports `xsave` and the OS has set `osxsave`.
+            let xcr0 = unsafe { _xgetbv(0) };
+            // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
+            let os_avx_support = xcr0 & 6 == 6;
+            // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`:
+            let os_avx512_support = xcr0 & 0xe0 == 0xe0;
+
+            // Only if the OS and the CPU support saving/restoring the AVX
+            // registers we enable `xsave` support:
+            if os_avx_support {
+                // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
+                // FEATURES" in the "Intel® 64 and IA-32 Architectures Software
+                // Developer’s Manual, Volume 1: Basic Architecture":
+                //
+                // "Software enables the XSAVE feature set by setting
+                // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
+                // instruction). If this bit is 0, execution of any of XGETBV,
+                // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
+                // causes an invalid-opcode exception (#UD)"
+
+                // FMA (uses 256-bit wide registers):
+                enable(proc_info_ecx, 12, cpu_flags::FMA);
+
+                // For AVX-512 the OS also needs to support saving/restoring
+                // the extended state, only then we enable AVX-512 support:
+                if os_avx512_support {
+                    enable(extended_features_edx, 23, cpu_flags::AVX512FP16);
+                    enable(extended_features_eax_leaf_1, 5, cpu_flags::AVX512BF16);
+                }
+            }
+        }
+    }
+
+    // As Hygon Dhyana originates from AMD technology and shares most of the architecture with
+    // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series number
+    // (Family 18h).
+    //
+    // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD
+    // family 17h.
+    //
+    // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf
+    // (AMD64 Architecture Programmer's Manual, Appendix E).
+    // Related Hygon kernel patch can be found on
+    // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn
+    if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" {
+        // These features are available on AMD arch CPUs:
+        enable(extended_proc_info_ecx, 16, cpu_flags::FMA4);
+    }
+
+    value
+}
+
+#[cfg(test)]
+mod tests {
+    extern crate std;
+    use std::is_x86_feature_detected;
+
+    use super::*;
+
+    #[test]
+    fn check_matches_std() {
+        let features = get_cpu_features();
+        for i in 0..cpu_flags::ALL.len() {
+            let flag = cpu_flags::ALL[i];
+            let name = cpu_flags::NAMES[i];
+
+            let std_detected = match flag {
+                cpu_flags::SSE3 => is_x86_feature_detected!("sse3"),
+                cpu_flags::F16C => is_x86_feature_detected!("f16c"),
+                cpu_flags::SSE => is_x86_feature_detected!("sse"),
+                cpu_flags::SSE2 => is_x86_feature_detected!("sse2"),
+                cpu_flags::ERMSB => is_x86_feature_detected!("ermsb"),
+                cpu_flags::MOVRS => continue, // only very recent support in std
+                cpu_flags::FMA => is_x86_feature_detected!("fma"),
+                cpu_flags::FMA4 => continue, // not yet supported in std
+                cpu_flags::AVX512FP16 => is_x86_feature_detected!("avx512fp16"),
+                cpu_flags::AVX512BF16 => is_x86_feature_detected!("avx512bf16"),
+                _ => panic!("untested CPU flag {name}"),
+            };
+
+            assert_eq!(
+                std_detected,
+                features.contains(flag),
+                "different flag {name}. flags: {features:?}"
+            );
+        }
+    }
+}
diff --git a/libm/src/math/arch/x86/fma.rs b/libm/src/math/arch/x86/fma.rs
new file mode 100644
index 000000000..eb43f4696
--- /dev/null
+++ b/libm/src/math/arch/x86/fma.rs
@@ -0,0 +1,134 @@
+//! Use assembly fma if the `fma` or `fma4` feature is detected at runtime.
+
+use core::arch::asm;
+
+use super::super::super::generic;
+use super::detect::{cpu_flags, get_cpu_features};
+use crate::support::{Round, select_once};
+
+pub fn fma(x: f64, y: f64, z: f64) -> f64 {
+    select_once! {
+        sig: fn(x: f64, y: f64, z: f64) -> f64,
+        init: || {
+            let features = get_cpu_features();
+            if features.contains(cpu_flags::FMA) {
+                fma_with_fma
+            } else if features.contains(cpu_flags::FMA4) {
+               fma_with_fma4
+            } else {
+                fma_fallback as Func
+            }
+        },
+        // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked.
+        call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
+    }
+}
+
+pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
+    select_once! {
+        sig: fn(x: f32, y: f32, z: f32) -> f32,
+        init: || {
+            let features = get_cpu_features();
+            if features.contains(cpu_flags::FMA) {
+                fmaf_with_fma
+            } else if features.contains(cpu_flags::FMA4) {
+                fmaf_with_fma4
+            } else {
+                fmaf_fallback as Func
+            }
+        },
+        // SAFETY: `fn_ptr` is the result of `init`, preconditions have been checked.
+        call: |fn_ptr: Func| unsafe { fn_ptr(x, y, z) },
+    }
+}
+
+/// # Safety
+///
+/// Must have +fma available.
+unsafe fn fma_with_fma(mut x: f64, y: f64, z: f64) -> f64 {
+    debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
+
+    // SAFETY: fma is asserted available by precondition, which provides the instruction. No
+    // memory access or side effects.
+    unsafe {
+        asm!(
+            "vfmadd213sd {x}, {y}, {z}",
+            x = inout(xmm_reg) x,
+            y = in(xmm_reg) y,
+            z = in(xmm_reg) z,
+            options(nostack, nomem, pure),
+        );
+    }
+    x
+}
+
+/// # Safety
+///
+/// Must have +fma available.
+unsafe fn fmaf_with_fma(mut x: f32, y: f32, z: f32) -> f32 {
+    debug_assert!(get_cpu_features().contains(cpu_flags::FMA));
+
+    // SAFETY: fma is asserted available by precondition, which provides the instruction. No
+    // memory access or side effects.
+    unsafe {
+        asm!(
+            "vfmadd213ss {x}, {y}, {z}",
+            x = inout(xmm_reg) x,
+            y = in(xmm_reg) y,
+            z = in(xmm_reg) z,
+            options(nostack, nomem, pure),
+        );
+    }
+    x
+}
+
+/// # Safety
+///
+/// Must have +fma4 available.
+unsafe fn fma_with_fma4(mut x: f64, y: f64, z: f64) -> f64 {
+    debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
+
+    // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No
+    // memory access or side effects.
+    unsafe {
+        asm!(
+            "vfmaddsd {x}, {x}, {y}, {z}",
+            x = inout(xmm_reg) x,
+            y = in(xmm_reg) y,
+            z = in(xmm_reg) z,
+            options(nostack, nomem, pure),
+        );
+    }
+    x
+}
+
+/// # Safety
+///
+/// Must have +fma4 available.
+unsafe fn fmaf_with_fma4(mut x: f32, y: f32, z: f32) -> f32 {
+    debug_assert!(get_cpu_features().contains(cpu_flags::FMA4));
+
+    // SAFETY: fma4 is asserted available by precondition, which provides the instruction. No
+    // memory access or side effects.
+    unsafe {
+        asm!(
+            "vfmaddss {x}, {x}, {y}, {z}",
+            x = inout(xmm_reg) x,
+            y = in(xmm_reg) y,
+            z = in(xmm_reg) z,
+            options(nostack, nomem, pure),
+        );
+    }
+    x
+}
+
+// FIXME: the `select_implementation` macro should handle arch implementations that want
+// to use the fallback, so we don't need to recreate the body.
+
+fn fma_fallback(x: f64, y: f64, z: f64) -> f64 {
+    generic::fma_round(x, y, z, Round::Nearest).val
+}
+
+fn fmaf_fallback(x: f32, y: f32, z: f32) -> f32 {
+    generic::fma_wide_round(x, y, z, Round::Nearest).val
+}
diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs
index 78f0f8992..5bf473cfe 100644
--- a/libm/src/math/fma.rs
+++ b/libm/src/math/fma.rs
@@ -19,7 +19,10 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
 pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
     select_implementation! {
         name: fmaf,
-        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            target_feature = "sse2",
+        ),
         args: x, y, z,
     }
 
@@ -33,7 +36,10 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
     select_implementation! {
         name: fma,
-        use_arch: all(target_arch = "aarch64", target_feature = "neon"),
+        use_arch: any(
+            all(target_arch = "aarch64", target_feature = "neon"),
+            target_feature = "sse2",
+        ),
         args: x, y, z,
     }
 
diff --git a/libm/src/math/support/feature_detect.rs b/libm/src/math/support/feature_detect.rs
new file mode 100644
index 000000000..cb669b073
--- /dev/null
+++ b/libm/src/math/support/feature_detect.rs
@@ -0,0 +1,206 @@
+//! Helpers for runtime target feature detection that are shared across architectures.
+
+use core::sync::atomic::{AtomicU32, Ordering};
+
+/// Given a list of identifiers, assign each one a unique sequential single-bit mask.
+#[allow(unused_macros)]
+macro_rules! unique_masks {
+    ($ty:ty, $($name:ident,)+) => {
+        #[cfg(test)]
+        pub const ALL: &[$ty] = &[$($name),+];
+        #[cfg(test)]
+        pub const NAMES: &[&str] = &[$(stringify!($name)),+];
+
+        unique_masks!(@one; $ty; 0; $($name,)+);
+    };
+    // Matcher for a single value
+    (@one; $_ty:ty; $_idx:expr;) => {};
+    (@one; $ty:ty; $shift:expr; $name:ident, $($tail:tt)*) => {
+        pub const $name: $ty = 1 << $shift;
+        // Ensure the top bit is not used since it stores initialized state.
+        const _: () = assert!($name != (1 << (<$ty>::BITS - 1)));
+        // Increment the shift and invoke the next
+        unique_masks!(@one; $ty; $shift + 1; $($tail)*);
+    };
+}
+
+/// Call `init` once to choose an implementation, then use it for the rest of the program.
+///
+/// - `sig` is the function type.
+/// - `init` is an expression called at startup that chooses an implementation and returns a
+///   function pointer.
+/// - `call` is an expression to call a function returned by `init`, encapsulating any safety
+///   preconditions.
+///
+/// The type `Func` is available in `init` and `call`.
+///
+/// This is effectively our version of an ifunc without linker support. Note that `init` may be
+/// called more than once until one completes.
+#[allow(unused_macros)] // only used on some architectures
+macro_rules! select_once {
+    (
+        sig: fn($($arg:ident: $ArgTy:ty),*) -> $RetTy:ty,
+        init: $init:expr,
+        call: $call:expr,
+    ) => {{
+        use core::mem;
+        use core::sync::atomic::{AtomicPtr, Ordering};
+
+        type Func = unsafe fn($($arg: $ArgTy),*) -> $RetTy;
+
+        /// Stores a pointer that is immediately jumped to. By default it is an init function
+        /// that sets FUNC to something else.
+        static FUNC: AtomicPtr<()> = AtomicPtr::new((initializer as Func) as *mut ());
+
+        /// Run once to set the function that will be used for all subsequent calls.
+        fn initializer($($arg: $ArgTy),*) -> $RetTy {
+            // Select an implementation, ensuring a 'static lifetime.
+            let fn_ptr: Func = $init();
+            FUNC.store(fn_ptr as *mut (), Ordering::Relaxed);
+
+            // Forward the call to the selected function.
+            $call(fn_ptr)
+        }
+
+        let raw: *mut () = FUNC.load(Ordering::Relaxed);
+
+        // SAFETY: will only ever be `initializer` or another function pointer that has the
+        // 'static lifetime.
+        let fn_ptr: Func = unsafe { mem::transmute::<*mut (), Func>(raw) };
+
+        $call(fn_ptr)
+    }}
+}
+
+pub(crate) use {select_once, unique_masks};
+
+use crate::support::cold_path;
+
+/// Helper for working with bit flags, based on `bitflags`.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Flags(u32);
+
+#[allow(dead_code)] // only used on some architectures
+impl Flags {
+    /// No bits set.
+    pub const fn empty() -> Self {
+        Self(0)
+    }
+
+    /// Create with bits already set.
+    pub const fn from_bits(val: u32) -> Self {
+        Self(val)
+    }
+
+    /// Get the integer representation.
+    pub fn bits(&self) -> u32 {
+        self.0
+    }
+
+    /// Set any bits in `mask`.
+    pub fn insert(&mut self, mask: u32) {
+        self.0 |= mask;
+    }
+
+    /// Check whether the mask is set.
+    pub fn contains(&self, mask: u32) -> bool {
+        self.0 & mask == mask
+    }
+
+    /// Check whether the nth bit is set.
+    pub fn test_nth(&self, bit: u32) -> bool {
+        debug_assert!(bit < u32::BITS, "bit index out-of-bounds");
+        self.0 & (1 << bit) != 0
+    }
+}
+
+/// Load flags from an atomic value. If the flags have not yet been initialized, call `init`
+/// to do so.
+///
+/// Note that `init` may run more than once.
+#[allow(dead_code)] // only used on some architectures
+pub fn get_or_init_flags_cache(cache: &AtomicU32, init: impl FnOnce() -> Flags) -> Flags {
+    // The top bit is used to indicate that the values have already been set once.
+    const INITIALIZED: u32 = 1 << 31;
+
+    // Relaxed ops are sufficient since the result should always be the same.
+    let mut flags = Flags::from_bits(cache.load(Ordering::Relaxed));
+
+    if !flags.contains(INITIALIZED) {
+        // Without this, `init` is inlined and the bit check gets wrapped in `init`'s lengthy
+        // prologue/epilogue. Cold pathing gives a preferable load->test->?jmp->ret.
+        cold_path();
+
+        flags = init();
+        debug_assert!(
+            !flags.contains(INITIALIZED),
+            "initialized bit shouldn't be set"
+        );
+        flags.insert(INITIALIZED);
+        cache.store(flags.bits(), Ordering::Relaxed);
+    }
+
+    flags
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn unique_masks() {
+        unique_masks! {
+            u32,
+            V0,
+            V1,
+            V2,
+        }
+        assert_eq!(V0, 1u32 << 0);
+        assert_eq!(V1, 1u32 << 1);
+        assert_eq!(V2, 1u32 << 2);
+        assert_eq!(ALL, [V0, V1, V2]);
+        assert_eq!(NAMES, ["V0", "V1", "V2"]);
+    }
+
+    #[test]
+    fn flag_cache_is_used() {
+        // Sanity check that flags are only ever set once
+        static CACHE: AtomicU32 = AtomicU32::new(0);
+
+        let mut f1 = Flags::from_bits(0x1);
+        let f2 = Flags::from_bits(0x2);
+
+        let r1 = get_or_init_flags_cache(&CACHE, || f1);
+        let r2 = get_or_init_flags_cache(&CACHE, || f2);
+
+        f1.insert(1 << 31); // init bit
+
+        assert_eq!(r1, f1);
+        assert_eq!(r2, f1);
+    }
+
+    #[test]
+    fn select_cache_is_used() {
+        // Sanity check that cache is used
+        static CALLED: AtomicU32 = AtomicU32::new(0);
+
+        fn inner() {
+            fn nop() {}
+
+            select_once! {
+                sig: fn() -> (),
+                init: || {
+                    CALLED.fetch_add(1, Ordering::Relaxed);
+                    nop
+                },
+                call: |fn_ptr: Func| unsafe { fn_ptr() },
+            }
+        }
+
+        // `init` should only have been called once.
+        inner();
+        assert_eq!(CALLED.load(Ordering::Relaxed), 1);
+        inner();
+        assert_eq!(CALLED.load(Ordering::Relaxed), 1);
+    }
+}
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
index ee3f2bbdf..727b9a360 100644
--- a/libm/src/math/support/mod.rs
+++ b/libm/src/math/support/mod.rs
@@ -2,6 +2,7 @@
 pub mod macros;
 mod big;
 mod env;
+mod feature_detect;
 mod float_traits;
 pub mod hex_float;
 mod int_traits;
@@ -10,6 +11,8 @@ mod int_traits;
 pub use big::{i256, u256};
 pub use env::{FpResult, Round, Status};
 #[allow(unused_imports)]
+pub(crate) use feature_detect::{Flags, get_or_init_flags_cache, select_once, unique_masks};
+#[allow(unused_imports)]
 pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[cfg(f16_enabled)]

From 257dd4808950ec85ed9ecffb8116c550079684f7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 3 May 2025 18:18:20 +0000
Subject: [PATCH 015/133] chore: release

---
 compiler-builtins/CHANGELOG.md | 6 ++++++
 compiler-builtins/Cargo.toml   | 2 +-
 libm/CHANGELOG.md              | 6 ++++++
 libm/Cargo.toml                | 2 +-
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md
index 34b413a86..f152c2c2c 100644
--- a/compiler-builtins/CHANGELOG.md
+++ b/compiler-builtins/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.1.157](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.156...compiler_builtins-v0.1.157) - 2025-05-03
+
+### Other
+
+- Use runtime feature detection for fma routines on x86
+
 ## [0.1.156](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.155...compiler_builtins-v0.1.156) - 2025-04-21
 
 ### Other
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index acbace687..784563777 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["Jorge Aparicio <japaricious@gmail.com>"]
 name = "compiler_builtins"
-version = "0.1.156"
+version = "0.1.157"
 license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"
diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md
index 292561f86..a0217af09 100644
--- a/libm/CHANGELOG.md
+++ b/libm/CHANGELOG.md
@@ -8,6 +8,12 @@ and this project adheres to
 
 ## [Unreleased]
 
+## [0.2.14](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.13...libm-v0.2.14) - 2025-05-03
+
+### Other
+
+- Use runtime feature detection for fma routines on x86
+
 ## [0.2.13](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.12...libm-v0.2.13) - 2025-04-21
 
 ### Fixed
diff --git a/libm/Cargo.toml b/libm/Cargo.toml
index f80715ff6..76c9a73bc 100644
--- a/libm/Cargo.toml
+++ b/libm/Cargo.toml
@@ -8,7 +8,7 @@ license = "MIT"
 name = "libm"
 readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"
-version = "0.2.13"
+version = "0.2.14"
 edition = "2021"
 rust-version = "1.63"
 

From 8e78411a4fdb83640ea6091aefb3e8e99c9320cd Mon Sep 17 00:00:00 2001
From: ELginas <gintaras.z123@yahoo.com>
Date: Mon, 28 Apr 2025 17:17:11 +0300
Subject: [PATCH 016/133] docs: fix typo in Cargo.toml

Initially introduced in 63ccaf11f08fb5d0b39cc33884c5a1a63f547ace

Signed-off-by: ELginas <gintaras.z123@yahoo.com>
---
 compiler-builtins/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 784563777..d9eebcfc8 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -35,7 +35,7 @@ default = ["compiler-builtins"]
 c = ["dep:cc"]
 
 # Workaround for the Cranelift codegen backend. Disables any implementations
-# which use inline assembly and fall back to pure Rust versions (if avalible).
+# which use inline assembly and fall back to pure Rust versions (if available).
 no-asm = []
 
 # Workaround for codegen backends which haven't yet implemented `f16` and

From 6d78c1acc995b9093365588e094a5defacd611e4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 5 May 2025 05:40:54 +0000
Subject: [PATCH 017/133] Replace `super::super` with `crate::support` where
 possible

Since `crate::support` now works in both `compiler-builtins` and `libm`,
we can get rid of some of these unusual paths.
---
 libm/src/math/generic/ceil.rs         | 3 +--
 libm/src/math/generic/copysign.rs     | 2 +-
 libm/src/math/generic/fabs.rs         | 2 +-
 libm/src/math/generic/fdim.rs         | 2 +-
 libm/src/math/generic/floor.rs        | 3 +--
 libm/src/math/generic/fmax.rs         | 2 +-
 libm/src/math/generic/fmaximum.rs     | 2 +-
 libm/src/math/generic/fmaximum_num.rs | 2 +-
 libm/src/math/generic/fmin.rs         | 2 +-
 libm/src/math/generic/fminimum.rs     | 2 +-
 libm/src/math/generic/fminimum_num.rs | 2 +-
 libm/src/math/generic/fmod.rs         | 2 +-
 libm/src/math/generic/rint.rs         | 3 +--
 libm/src/math/generic/round.rs        | 2 +-
 libm/src/math/generic/scalbn.rs       | 2 +-
 libm/src/math/generic/sqrt.rs         | 5 +++--
 libm/src/math/generic/trunc.rs        | 3 +--
 17 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/libm/src/math/generic/ceil.rs b/libm/src/math/generic/ceil.rs
index 499770c0d..1072ba7c2 100644
--- a/libm/src/math/generic/ceil.rs
+++ b/libm/src/math/generic/ceil.rs
@@ -7,8 +7,7 @@
 //! performance seems to be better (based on icount) and it does not seem to experience rounding
 //! errors on i386.
 
-use super::super::support::{FpResult, Status};
-use super::super::{Float, Int, IntTy, MinInt};
+use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
 
 #[inline]
 pub fn ceil<F: Float>(x: F) -> F {
diff --git a/libm/src/math/generic/copysign.rs b/libm/src/math/generic/copysign.rs
index a61af22f0..da9ce3878 100644
--- a/libm/src/math/generic/copysign.rs
+++ b/libm/src/math/generic/copysign.rs
@@ -1,4 +1,4 @@
-use super::super::Float;
+use crate::support::Float;
 
 /// Copy the sign of `y` to `x`.
 #[inline]
diff --git a/libm/src/math/generic/fabs.rs b/libm/src/math/generic/fabs.rs
index 0fa0edf9b..0adfa57d9 100644
--- a/libm/src/math/generic/fabs.rs
+++ b/libm/src/math/generic/fabs.rs
@@ -1,4 +1,4 @@
-use super::super::Float;
+use crate::support::Float;
 
 /// Absolute value.
 #[inline]
diff --git a/libm/src/math/generic/fdim.rs b/libm/src/math/generic/fdim.rs
index a63007b19..289e5fd96 100644
--- a/libm/src/math/generic/fdim.rs
+++ b/libm/src/math/generic/fdim.rs
@@ -1,4 +1,4 @@
-use super::super::Float;
+use crate::support::Float;
 
 #[inline]
 pub fn fdim<F: Float>(x: F, y: F) -> F {
diff --git a/libm/src/math/generic/floor.rs b/libm/src/math/generic/floor.rs
index 58d1ee4c2..e6dfd8866 100644
--- a/libm/src/math/generic/floor.rs
+++ b/libm/src/math/generic/floor.rs
@@ -7,8 +7,7 @@
 //! performance seems to be better (based on icount) and it does not seem to experience rounding
 //! errors on i386.
 
-use super::super::support::{FpResult, Status};
-use super::super::{Float, Int, IntTy, MinInt};
+use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
 
 #[inline]
 pub fn floor<F: Float>(x: F) -> F {
diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs
index bf3f847e8..54207e4b3 100644
--- a/libm/src/math/generic/fmax.rs
+++ b/libm/src/math/generic/fmax.rs
@@ -14,7 +14,7 @@
 //!
 //! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf
 
-use super::super::Float;
+use crate::support::Float;
 
 #[inline]
 pub fn fmax<F: Float>(x: F, y: F) -> F {
diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs
index 387055af2..4b6295bc0 100644
--- a/libm/src/math/generic/fmaximum.rs
+++ b/libm/src/math/generic/fmaximum.rs
@@ -9,7 +9,7 @@
 //!
 //! Excluded from our implementation is sNaN handling.
 
-use super::super::Float;
+use crate::support::Float;
 
 #[inline]
 pub fn fmaximum<F: Float>(x: F, y: F) -> F {
diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs
index f7efdde80..2e97ff6d3 100644
--- a/libm/src/math/generic/fmaximum_num.rs
+++ b/libm/src/math/generic/fmaximum_num.rs
@@ -11,7 +11,7 @@
 //!
 //! Excluded from our implementation is sNaN handling.
 
-use super::super::Float;
+use crate::support::Float;
 
 #[inline]
 pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs
index cd3caeee4..0f86364d2 100644
--- a/libm/src/math/generic/fmin.rs
+++ b/libm/src/math/generic/fmin.rs
@@ -14,7 +14,7 @@
 //!
 //! [link]: https://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf
 
-use super::super::Float;
+use crate::support::Float;
 
 #[inline]
 pub fn fmin<F: Float>(x: F, y: F) -> F {
diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs
index 4ddb36455..9dc0b64be 100644
--- a/libm/src/math/generic/fminimum.rs
+++ b/libm/src/math/generic/fminimum.rs
@@ -9,7 +9,7 @@
 //!
 //! Excluded from our implementation is sNaN handling.
 
-use super::super::Float;
+use crate::support::Float;
 
 #[inline]
 pub fn fminimum<F: Float>(x: F, y: F) -> F {
diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs
index 441c204a9..40db8b189 100644
--- a/libm/src/math/generic/fminimum_num.rs
+++ b/libm/src/math/generic/fminimum_num.rs
@@ -11,7 +11,7 @@
 //!
 //! Excluded from our implementation is sNaN handling.
 
-use super::super::Float;
+use crate::support::Float;
 
 #[inline]
 pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
diff --git a/libm/src/math/generic/fmod.rs b/libm/src/math/generic/fmod.rs
index e9898012f..29acc8a4d 100644
--- a/libm/src/math/generic/fmod.rs
+++ b/libm/src/math/generic/fmod.rs
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: MIT OR Apache-2.0 */
-use super::super::{CastFrom, Float, Int, MinInt};
+use crate::support::{CastFrom, Float, Int, MinInt};
 
 #[inline]
 pub fn fmod<F: Float>(x: F, y: F) -> F {
diff --git a/libm/src/math/generic/rint.rs b/libm/src/math/generic/rint.rs
index 7bf38e323..c5bc27d3d 100644
--- a/libm/src/math/generic/rint.rs
+++ b/libm/src/math/generic/rint.rs
@@ -1,8 +1,7 @@
 /* SPDX-License-Identifier: MIT */
 /* origin: musl src/math/rint.c */
 
-use super::super::Float;
-use super::super::support::{FpResult, Round};
+use crate::support::{Float, FpResult, Round};
 
 /// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
 /// applicable.
diff --git a/libm/src/math/generic/round.rs b/libm/src/math/generic/round.rs
index 01314ac70..16739f01d 100644
--- a/libm/src/math/generic/round.rs
+++ b/libm/src/math/generic/round.rs
@@ -1,5 +1,5 @@
-use super::super::{Float, MinInt};
 use super::{copysign, trunc};
+use crate::support::{Float, MinInt};
 
 #[inline]
 pub fn round<F: Float>(x: F) -> F {
diff --git a/libm/src/math/generic/scalbn.rs b/libm/src/math/generic/scalbn.rs
index a45db1b4a..6dd9b1a9b 100644
--- a/libm/src/math/generic/scalbn.rs
+++ b/libm/src/math/generic/scalbn.rs
@@ -1,4 +1,4 @@
-use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
+use crate::support::{CastFrom, CastInto, Float, IntTy, MinInt};
 
 /// Scale the exponent.
 ///
diff --git a/libm/src/math/generic/sqrt.rs b/libm/src/math/generic/sqrt.rs
index c52560bdb..9481c4cdb 100644
--- a/libm/src/math/generic/sqrt.rs
+++ b/libm/src/math/generic/sqrt.rs
@@ -41,8 +41,9 @@
 //! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are
 //! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it.
 
-use super::super::support::{FpResult, IntTy, Round, Status, cold_path};
-use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
+use crate::support::{
+    CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, cold_path,
+};
 
 #[inline]
 pub fn sqrt<F>(x: F) -> F
diff --git a/libm/src/math/generic/trunc.rs b/libm/src/math/generic/trunc.rs
index 29a28f47b..d5b444d15 100644
--- a/libm/src/math/generic/trunc.rs
+++ b/libm/src/math/generic/trunc.rs
@@ -1,8 +1,7 @@
 /* SPDX-License-Identifier: MIT
  * origin: musl src/math/trunc.c */
 
-use super::super::support::{FpResult, Status};
-use super::super::{Float, Int, IntTy, MinInt};
+use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
 
 #[inline]
 pub fn trunc<F: Float>(x: F) -> F {

From 1b1b2ed16eac475b6e974ce0cb16e76a9d9c10fa Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 5 May 2025 05:43:58 +0000
Subject: [PATCH 018/133] ci: Mention `ci: skip-extensive` in the error message

---
 ci/ci-util.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ci/ci-util.py b/ci/ci-util.py
index 7486d6b41..d785b2e9e 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -261,7 +261,9 @@ def emit_workflow_output(self):
         if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD:
             eprint(
                 f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
-                f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is intentional"
+                f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is"
+                " intentional. If this is refactoring that happens to touch a lot of"
+                f" files, `{SKIP_EXTENSIVE_DIRECTIVE}` can be used instead."
             )
             exit(1)
 

From da8b5829f44f9fe04dfac7cafde8310e0ed0a429 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 6 May 2025 20:11:48 +0000
Subject: [PATCH 019/133] Require `target_has_atomic = "ptr"` for runtime
 feature detection

The `feature_detect` module is currently being built on all targets, but
the use of `AtomicU32` causes a problem if atomics are not available
(such as with `bpfel-unknown-none`). Gate this module behind
`target_has_atomic = "ptr"`.

The below now completes successfully:

    cargo build -p compiler_builtins --target=bpfel-unknown-none -Z build-std=core

Fixes: https://github.com/rust-lang/compiler-builtins/issues/908
---
 libm/src/math/arch/x86/detect.rs        | 7 +++++--
 libm/src/math/arch/x86/fma.rs           | 3 ++-
 libm/src/math/support/feature_detect.rs | 5 +++++
 libm/src/math/support/mod.rs            | 6 +++---
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/libm/src/math/arch/x86/detect.rs b/libm/src/math/arch/x86/detect.rs
index 71c3281dc..e6d9b040b 100644
--- a/libm/src/math/arch/x86/detect.rs
+++ b/libm/src/math/arch/x86/detect.rs
@@ -1,13 +1,16 @@
+// Using runtime feature detection requires atomics. Currently there are no x86 targets
+// that support sse but not `AtomicPtr`.
+
 #[cfg(target_arch = "x86")]
 use core::arch::x86::{__cpuid, __cpuid_count, _xgetbv, CpuidResult};
 #[cfg(target_arch = "x86_64")]
 use core::arch::x86_64::{__cpuid, __cpuid_count, _xgetbv, CpuidResult};
 
-use crate::support::{Flags, get_or_init_flags_cache};
+use crate::support::feature_detect::{Flags, get_or_init_flags_cache, unique_masks};
 
 /// CPU features that get cached (doesn't correlate to anything on the CPU).
 pub mod cpu_flags {
-    use crate::support::unique_masks;
+    use super::unique_masks;
 
     unique_masks! {
         u32,
diff --git a/libm/src/math/arch/x86/fma.rs b/libm/src/math/arch/x86/fma.rs
index eb43f4696..43ac18779 100644
--- a/libm/src/math/arch/x86/fma.rs
+++ b/libm/src/math/arch/x86/fma.rs
@@ -4,7 +4,8 @@ use core::arch::asm;
 
 use super::super::super::generic;
 use super::detect::{cpu_flags, get_cpu_features};
-use crate::support::{Round, select_once};
+use crate::support::Round;
+use crate::support::feature_detect::select_once;
 
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
     select_once! {
diff --git a/libm/src/math/support/feature_detect.rs b/libm/src/math/support/feature_detect.rs
index cb669b073..9ebd434a5 100644
--- a/libm/src/math/support/feature_detect.rs
+++ b/libm/src/math/support/feature_detect.rs
@@ -1,5 +1,9 @@
 //! Helpers for runtime target feature detection that are shared across architectures.
 
+// `AtomicU32` is preferred for a consistent size across targets.
+#[cfg(all(target_has_atomic = "ptr", not(target_has_atomic = "32")))]
+compile_error!("currently all targets that support `AtomicPtr` also support `AtomicU32`");
+
 use core::sync::atomic::{AtomicU32, Ordering};
 
 /// Given a list of identifiers, assign each one a unique sequential single-bit mask.
@@ -72,6 +76,7 @@ macro_rules! select_once {
     }}
 }
 
+#[allow(unused_imports)]
 pub(crate) use {select_once, unique_masks};
 
 use crate::support::cold_path;
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
index 727b9a360..a4f596ab8 100644
--- a/libm/src/math/support/mod.rs
+++ b/libm/src/math/support/mod.rs
@@ -2,7 +2,9 @@
 pub mod macros;
 mod big;
 mod env;
-mod feature_detect;
+// Runtime feature detection requires atomics.
+#[cfg(target_has_atomic = "ptr")]
+pub(crate) mod feature_detect;
 mod float_traits;
 pub mod hex_float;
 mod int_traits;
@@ -11,8 +13,6 @@ mod int_traits;
 pub use big::{i256, u256};
 pub use env::{FpResult, Round, Status};
 #[allow(unused_imports)]
-pub(crate) use feature_detect::{Flags, get_or_init_flags_cache, select_once, unique_masks};
-#[allow(unused_imports)]
 pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[cfg(f16_enabled)]

From cf0094106471e100f79000dba1926705f5f7f392 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 6 May 2025 21:59:33 +0000
Subject: [PATCH 020/133] chore: release

---
 compiler-builtins/CHANGELOG.md | 6 ++++++
 compiler-builtins/Cargo.toml   | 2 +-
 libm/CHANGELOG.md              | 6 ++++++
 libm/Cargo.toml                | 2 +-
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md
index f152c2c2c..f0af37ba0 100644
--- a/compiler-builtins/CHANGELOG.md
+++ b/compiler-builtins/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.1.158](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.157...compiler_builtins-v0.1.158) - 2025-05-06
+
+### Other
+
+- Require `target_has_atomic = "ptr"` for runtime feature detection
+
 ## [0.1.157](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.156...compiler_builtins-v0.1.157) - 2025-05-03
 
 ### Other
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index d9eebcfc8..81f708c48 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["Jorge Aparicio <japaricious@gmail.com>"]
 name = "compiler_builtins"
-version = "0.1.157"
+version = "0.1.158"
 license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"
diff --git a/libm/CHANGELOG.md b/libm/CHANGELOG.md
index a0217af09..33fec06aa 100644
--- a/libm/CHANGELOG.md
+++ b/libm/CHANGELOG.md
@@ -8,6 +8,12 @@ and this project adheres to
 
 ## [Unreleased]
 
+## [0.2.15](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.14...libm-v0.2.15) - 2025-05-06
+
+### Other
+
+- Require `target_has_atomic = "ptr"` for runtime feature detection
+
 ## [0.2.14](https://github.com/rust-lang/compiler-builtins/compare/libm-v0.2.13...libm-v0.2.14) - 2025-05-03
 
 ### Other
diff --git a/libm/Cargo.toml b/libm/Cargo.toml
index 76c9a73bc..b6fb5efcf 100644
--- a/libm/Cargo.toml
+++ b/libm/Cargo.toml
@@ -8,7 +8,7 @@ license = "MIT"
 name = "libm"
 readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"
-version = "0.2.14"
+version = "0.2.15"
 edition = "2021"
 rust-version = "1.63"
 

From a4c748f72a1dce652cc3e41c3a8425731bd1519a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 6 May 2025 23:00:46 +0000
Subject: [PATCH 021/133] release-plz: Include the libm changelog in
 compiler-builtins

---
 .release-plz.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.release-plz.toml b/.release-plz.toml
index 95e91a46c..8023ade9b 100644
--- a/.release-plz.toml
+++ b/.release-plz.toml
@@ -7,6 +7,7 @@ publish_allow_dirty = true
 [[package]]
 name = "compiler_builtins"
 semver_check = false
+changelog_include = ["libm"] # libm is included as part of builtins
 
 [[package]]
 name = "libm"

From ab01e290b8fbaf334e934d75e27516829e000b3a Mon Sep 17 00:00:00 2001
From: nora <48135649+Noratrieb@users.noreply.github.com>
Date: Mon, 12 May 2025 16:15:24 +0200
Subject: [PATCH 022/133] Remove cfg(bootstrap)

Foe the  bootstrap bump
---
 compiler-builtins/src/macros.rs | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs
index dbf715534..22e0dd27f 100644
--- a/compiler-builtins/src/macros.rs
+++ b/compiler-builtins/src/macros.rs
@@ -433,18 +433,6 @@ macro_rules! intrinsics {
     ) => (
         // `#[naked]` definitions are referenced by other places, so we can't use `cfg` like the others
         pub mod $name {
-            // FIXME: when bootstrap supports `#[unsafe(naked)]` this duplication can be removed
-            #[cfg(bootstrap)]
-            #[naked]
-            #[allow(unused_unsafe)]
-            $(#[$($attr)*])*
-            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
-            #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
-            pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
-                unsafe { $($body)* }
-            }
-
-            #[cfg(not(bootstrap))]
             #[unsafe(naked)]
             $(#[$($attr)*])*
             #[cfg_attr(not(feature = "mangled-names"), no_mangle)]

From 87a6afb37fad14cee50c498d4dcd6c5a09930750 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 12 May 2025 14:26:59 +0000
Subject: [PATCH 023/133] chore(compiler_builtins): release v0.1.159

---
 compiler-builtins/CHANGELOG.md | 6 ++++++
 compiler-builtins/Cargo.toml   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md
index f0af37ba0..a7c01c463 100644
--- a/compiler-builtins/CHANGELOG.md
+++ b/compiler-builtins/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.1.159](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.158...compiler_builtins-v0.1.159) - 2025-05-12
+
+### Other
+
+- Remove cfg(bootstrap)
+
 ## [0.1.158](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.157...compiler_builtins-v0.1.158) - 2025-05-06
 
 ### Other
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 81f708c48..d65a22152 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["Jorge Aparicio <japaricious@gmail.com>"]
 name = "compiler_builtins"
-version = "0.1.158"
+version = "0.1.159"
 license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"

From f2918cd0f4d23a6ff038a7a9a5ea2695598aeaaa Mon Sep 17 00:00:00 2001
From: Tobias Decking <Tobias.Decking@gmail.com>
Date: Thu, 8 May 2025 15:00:59 +0200
Subject: [PATCH 024/133] Fix `i256::MAX`

---
 compiler-builtins/src/int/big.rs | 2 +-
 libm/src/math/support/big.rs     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-builtins/src/int/big.rs b/compiler-builtins/src/int/big.rs
index 61f1349d9..1402efb8e 100644
--- a/compiler-builtins/src/int/big.rs
+++ b/compiler-builtins/src/int/big.rs
@@ -65,7 +65,7 @@ impl MinInt for i256 {
     const ZERO: Self = Self([0u64; 4]);
     const ONE: Self = Self([1, 0, 0, 0]);
     const MIN: Self = Self([0, 0, 0, 1 << 63]);
-    const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]);
+    const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]);
 }
 
 macro_rules! impl_common {
diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs
index f24c063cd..8a52d86cc 100644
--- a/libm/src/math/support/big.rs
+++ b/libm/src/math/support/big.rs
@@ -83,7 +83,7 @@ impl MinInt for i256 {
     };
     const MAX: Self = Self {
         lo: u128::MAX,
-        hi: u128::MAX << 1,
+        hi: u128::MAX >> 1,
     };
 }
 

From 233434412fe7eced8f1ddbfeddabef1d55e493bd Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 18 May 2025 10:14:22 +0200
Subject: [PATCH 025/133] fix an if statement that can be collapsed

---
 crates/libm-macros/src/lib.rs | 40 ++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index e8afe3aad..482da974c 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -1,3 +1,5 @@
+#![feature(let_chains)]
+
 mod enums;
 mod parse;
 mod shared;
@@ -266,27 +268,27 @@ fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static MathOpInfo>
         }
     }
 
-    if let Some(map) = &input.fn_extra {
-        if !map.keys().any(|key| key == "_") {
-            // No default provided; make sure every expected function is covered
-            let mut fns_not_covered = Vec::new();
-            for func in &fn_list {
-                if !map.keys().any(|key| key == func.name) {
-                    // `name` was not mentioned in the `match` statement
-                    fns_not_covered.push(func);
-                }
+    if let Some(map) = &input.fn_extra
+        && !map.keys().any(|key| key == "_")
+    {
+        // No default provided; make sure every expected function is covered
+        let mut fns_not_covered = Vec::new();
+        for func in &fn_list {
+            if !map.keys().any(|key| key == func.name) {
+                // `name` was not mentioned in the `match` statement
+                fns_not_covered.push(func);
             }
+        }
 
-            if !fns_not_covered.is_empty() {
-                let e = syn::Error::new(
-                    input.fn_extra_span.unwrap(),
-                    format!(
-                        "`fn_extra`: no default `_` pattern specified and the following \
-                         patterns are not covered: {fns_not_covered:#?}"
-                    ),
-                );
-                return Err(e);
-            }
+        if !fns_not_covered.is_empty() {
+            let e = syn::Error::new(
+                input.fn_extra_span.unwrap(),
+                format!(
+                    "`fn_extra`: no default `_` pattern specified and the following \
+                     patterns are not covered: {fns_not_covered:#?}"
+                ),
+            );
+            return Err(e);
         }
     };
 

From da5f72d8f3d550648f53f7a5d8ec4ac9d886e01c Mon Sep 17 00:00:00 2001
From: beetrees <b@beetr.ee>
Date: Wed, 21 May 2025 18:11:11 +0100
Subject: [PATCH 026/133] Enable `__powitf2` on MSVC

---
 builtins-test/tests/float_pow.rs   | 2 --
 compiler-builtins/src/float/pow.rs | 2 --
 2 files changed, 4 deletions(-)

diff --git a/builtins-test/tests/float_pow.rs b/builtins-test/tests/float_pow.rs
index 8209543e6..0e8ae88e8 100644
--- a/builtins-test/tests/float_pow.rs
+++ b/builtins-test/tests/float_pow.rs
@@ -58,8 +58,6 @@ pow! {
 }
 
 #[cfg(f128_enabled)]
-// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly.
-#[cfg(not(target_env = "msvc"))]
 #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
 pow! {
     f128, 1e-36, __powitf2, not(feature = "no-sys-f128");
diff --git a/compiler-builtins/src/float/pow.rs b/compiler-builtins/src/float/pow.rs
index 45a4ad904..6997a9c21 100644
--- a/compiler-builtins/src/float/pow.rs
+++ b/compiler-builtins/src/float/pow.rs
@@ -32,8 +32,6 @@ intrinsics! {
 
     #[ppc_alias = __powikf2]
     #[cfg(f128_enabled)]
-    // FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly.
-    #[cfg(not(target_env = "msvc"))]
     pub extern "C" fn __powitf2(a: f128, b: i32) -> f128 {
         pow(a, b)
     }

From 3f0959fa9967030775bc7f47eff63a8174f03acc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1lyi=20L=C5=91rinc?= <palyilorinc33@gmail.com>
Date: Sat, 10 May 2025 08:36:28 +0000
Subject: [PATCH 027/133] fixed typo in readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3130ff7b7..177bce624 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ This repository contains two main crates:
 * `compiler-builtins`: symbols that the compiler expects to be available at
   link time
 * `libm`: a Rust implementation of C math libraries, used to provide
-  implementations in `ocre`.
+  implementations in `core`.
 
 More details are at [compiler-builtins/README.md](compiler-builtins/README.md)
 and [libm/README.md](libm/README.md).

From 157a0b7df5a612173f9a8139e2066725bf049bc8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 22 Apr 2025 19:35:21 -0400
Subject: [PATCH 028/133] libm: Clean up unused files

These were deleted during refactoring in 0a2dc5d9 ("Combine the source
files for more generic implementations") but got added back by accident
in 54bac411 ("refactor: Move the libm crate to a subdirectory"). Remove
them again here.
---
 libm/src/math/copysignf.rs    |  8 -------
 libm/src/math/copysignf128.rs |  8 -------
 libm/src/math/copysignf16.rs  |  8 -------
 libm/src/math/fabsf.rs        | 39 -----------------------------------
 libm/src/math/fabsf128.rs     | 31 ----------------------------
 libm/src/math/fabsf16.rs      | 31 ----------------------------
 libm/src/math/fdimf.rs        | 12 -----------
 libm/src/math/fdimf128.rs     | 12 -----------
 libm/src/math/fdimf16.rs      | 12 -----------
 libm/src/math/floorf.rs       | 13 ------------
 libm/src/math/floorf128.rs    |  7 -------
 libm/src/math/floorf16.rs     |  7 -------
 libm/src/math/fmodf.rs        |  5 -----
 libm/src/math/fmodf128.rs     |  5 -----
 libm/src/math/fmodf16.rs      |  5 -----
 libm/src/math/ldexpf.rs       |  4 ----
 libm/src/math/ldexpf128.rs    |  4 ----
 libm/src/math/ldexpf16.rs     |  4 ----
 libm/src/math/roundf.rs       |  5 -----
 libm/src/math/roundf128.rs    |  5 -----
 libm/src/math/roundf16.rs     |  5 -----
 libm/src/math/scalbnf.rs      |  4 ----
 libm/src/math/scalbnf128.rs   |  4 ----
 libm/src/math/scalbnf16.rs    |  4 ----
 libm/src/math/sqrtf.rs        | 15 --------------
 libm/src/math/sqrtf128.rs     |  5 -----
 libm/src/math/sqrtf16.rs      | 11 ----------
 libm/src/math/truncf.rs       | 23 ---------------------
 libm/src/math/truncf128.rs    |  7 -------
 libm/src/math/truncf16.rs     |  7 -------
 30 files changed, 310 deletions(-)
 delete mode 100644 libm/src/math/copysignf.rs
 delete mode 100644 libm/src/math/copysignf128.rs
 delete mode 100644 libm/src/math/copysignf16.rs
 delete mode 100644 libm/src/math/fabsf.rs
 delete mode 100644 libm/src/math/fabsf128.rs
 delete mode 100644 libm/src/math/fabsf16.rs
 delete mode 100644 libm/src/math/fdimf.rs
 delete mode 100644 libm/src/math/fdimf128.rs
 delete mode 100644 libm/src/math/fdimf16.rs
 delete mode 100644 libm/src/math/floorf.rs
 delete mode 100644 libm/src/math/floorf128.rs
 delete mode 100644 libm/src/math/floorf16.rs
 delete mode 100644 libm/src/math/fmodf.rs
 delete mode 100644 libm/src/math/fmodf128.rs
 delete mode 100644 libm/src/math/fmodf16.rs
 delete mode 100644 libm/src/math/ldexpf.rs
 delete mode 100644 libm/src/math/ldexpf128.rs
 delete mode 100644 libm/src/math/ldexpf16.rs
 delete mode 100644 libm/src/math/roundf.rs
 delete mode 100644 libm/src/math/roundf128.rs
 delete mode 100644 libm/src/math/roundf16.rs
 delete mode 100644 libm/src/math/scalbnf.rs
 delete mode 100644 libm/src/math/scalbnf128.rs
 delete mode 100644 libm/src/math/scalbnf16.rs
 delete mode 100644 libm/src/math/sqrtf.rs
 delete mode 100644 libm/src/math/sqrtf128.rs
 delete mode 100644 libm/src/math/sqrtf16.rs
 delete mode 100644 libm/src/math/truncf.rs
 delete mode 100644 libm/src/math/truncf128.rs
 delete mode 100644 libm/src/math/truncf16.rs

diff --git a/libm/src/math/copysignf.rs b/libm/src/math/copysignf.rs
deleted file mode 100644
index 8b9bed4c0..000000000
--- a/libm/src/math/copysignf.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-/// Sign of Y, magnitude of X (f32)
-///
-/// Constructs a number with the magnitude (absolute value) of its
-/// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn copysignf(x: f32, y: f32) -> f32 {
-    super::generic::copysign(x, y)
-}
diff --git a/libm/src/math/copysignf128.rs b/libm/src/math/copysignf128.rs
deleted file mode 100644
index 7bd81d42b..000000000
--- a/libm/src/math/copysignf128.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-/// Sign of Y, magnitude of X (f128)
-///
-/// Constructs a number with the magnitude (absolute value) of its
-/// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn copysignf128(x: f128, y: f128) -> f128 {
-    super::generic::copysign(x, y)
-}
diff --git a/libm/src/math/copysignf16.rs b/libm/src/math/copysignf16.rs
deleted file mode 100644
index 820658686..000000000
--- a/libm/src/math/copysignf16.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-/// Sign of Y, magnitude of X (f16)
-///
-/// Constructs a number with the magnitude (absolute value) of its
-/// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn copysignf16(x: f16, y: f16) -> f16 {
-    super::generic::copysign(x, y)
-}
diff --git a/libm/src/math/fabsf.rs b/libm/src/math/fabsf.rs
deleted file mode 100644
index e5820a26c..000000000
--- a/libm/src/math/fabsf.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-/// Absolute value (magnitude) (f32)
-///
-/// Calculates the absolute value (magnitude) of the argument `x`,
-/// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fabsf(x: f32) -> f32 {
-    select_implementation! {
-        name: fabsf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        args: x,
-    }
-
-    super::generic::fabs(x)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(fabsf(-1.0), 1.0);
-        assert_eq!(fabsf(2.8), 2.8);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
-    #[test]
-    fn spec_tests() {
-        assert!(fabsf(f32::NAN).is_nan());
-        for f in [0.0, -0.0].iter().copied() {
-            assert_eq!(fabsf(f), 0.0);
-        }
-        for f in [f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
-            assert_eq!(fabsf(f), f32::INFINITY);
-        }
-    }
-}
diff --git a/libm/src/math/fabsf128.rs b/libm/src/math/fabsf128.rs
deleted file mode 100644
index 46429ca49..000000000
--- a/libm/src/math/fabsf128.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-/// Absolute value (magnitude) (f128)
-///
-/// Calculates the absolute value (magnitude) of the argument `x`,
-/// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fabsf128(x: f128) -> f128 {
-    super::generic::fabs(x)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(fabsf128(-1.0), 1.0);
-        assert_eq!(fabsf128(2.8), 2.8);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
-    #[test]
-    fn spec_tests() {
-        assert!(fabsf128(f128::NAN).is_nan());
-        for f in [0.0, -0.0].iter().copied() {
-            assert_eq!(fabsf128(f), 0.0);
-        }
-        for f in [f128::INFINITY, f128::NEG_INFINITY].iter().copied() {
-            assert_eq!(fabsf128(f), f128::INFINITY);
-        }
-    }
-}
diff --git a/libm/src/math/fabsf16.rs b/libm/src/math/fabsf16.rs
deleted file mode 100644
index eee42ac6a..000000000
--- a/libm/src/math/fabsf16.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-/// Absolute value (magnitude) (f16)
-///
-/// Calculates the absolute value (magnitude) of the argument `x`,
-/// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fabsf16(x: f16) -> f16 {
-    super::generic::fabs(x)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn sanity_check() {
-        assert_eq!(fabsf16(-1.0), 1.0);
-        assert_eq!(fabsf16(2.8), 2.8);
-    }
-
-    /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs
-    #[test]
-    fn spec_tests() {
-        assert!(fabsf16(f16::NAN).is_nan());
-        for f in [0.0, -0.0].iter().copied() {
-            assert_eq!(fabsf16(f), 0.0);
-        }
-        for f in [f16::INFINITY, f16::NEG_INFINITY].iter().copied() {
-            assert_eq!(fabsf16(f), f16::INFINITY);
-        }
-    }
-}
diff --git a/libm/src/math/fdimf.rs b/libm/src/math/fdimf.rs
deleted file mode 100644
index 367ef517c..000000000
--- a/libm/src/math/fdimf.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/// Positive difference (f32)
-///
-/// Determines the positive difference between arguments, returning:
-/// * x - y if x > y, or
-/// * +0    if x <= y, or
-/// * NAN   if either argument is NAN.
-///
-/// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fdimf(x: f32, y: f32) -> f32 {
-    super::generic::fdim(x, y)
-}
diff --git a/libm/src/math/fdimf128.rs b/libm/src/math/fdimf128.rs
deleted file mode 100644
index 6f3d1d0ff..000000000
--- a/libm/src/math/fdimf128.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/// Positive difference (f128)
-///
-/// Determines the positive difference between arguments, returning:
-/// * x - y if x > y, or
-/// * +0    if x <= y, or
-/// * NAN   if either argument is NAN.
-///
-/// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fdimf128(x: f128, y: f128) -> f128 {
-    super::generic::fdim(x, y)
-}
diff --git a/libm/src/math/fdimf16.rs b/libm/src/math/fdimf16.rs
deleted file mode 100644
index 37bd68858..000000000
--- a/libm/src/math/fdimf16.rs
+++ /dev/null
@@ -1,12 +0,0 @@
-/// Positive difference (f16)
-///
-/// Determines the positive difference between arguments, returning:
-/// * x - y if x > y, or
-/// * +0    if x <= y, or
-/// * NAN   if either argument is NAN.
-///
-/// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fdimf16(x: f16, y: f16) -> f16 {
-    super::generic::fdim(x, y)
-}
diff --git a/libm/src/math/floorf.rs b/libm/src/math/floorf.rs
deleted file mode 100644
index 16957b7f3..000000000
--- a/libm/src/math/floorf.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-/// Floor (f32)
-///
-/// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn floorf(x: f32) -> f32 {
-    select_implementation! {
-        name: floorf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        args: x,
-    }
-
-    return super::generic::floor(x);
-}
diff --git a/libm/src/math/floorf128.rs b/libm/src/math/floorf128.rs
deleted file mode 100644
index 9a9fe4151..000000000
--- a/libm/src/math/floorf128.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Floor (f128)
-///
-/// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn floorf128(x: f128) -> f128 {
-    return super::generic::floor(x);
-}
diff --git a/libm/src/math/floorf16.rs b/libm/src/math/floorf16.rs
deleted file mode 100644
index f9b868e04..000000000
--- a/libm/src/math/floorf16.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Floor (f16)
-///
-/// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn floorf16(x: f16) -> f16 {
-    return super::generic::floor(x);
-}
diff --git a/libm/src/math/fmodf.rs b/libm/src/math/fmodf.rs
deleted file mode 100644
index 4e95696e2..000000000
--- a/libm/src/math/fmodf.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmodf(x: f32, y: f32) -> f32 {
-    super::generic::fmod(x, y)
-}
diff --git a/libm/src/math/fmodf128.rs b/libm/src/math/fmodf128.rs
deleted file mode 100644
index ff0e0493e..000000000
--- a/libm/src/math/fmodf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmodf128(x: f128, y: f128) -> f128 {
-    super::generic::fmod(x, y)
-}
diff --git a/libm/src/math/fmodf16.rs b/libm/src/math/fmodf16.rs
deleted file mode 100644
index 11972a7de..000000000
--- a/libm/src/math/fmodf16.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn fmodf16(x: f16, y: f16) -> f16 {
-    super::generic::fmod(x, y)
-}
diff --git a/libm/src/math/ldexpf.rs b/libm/src/math/ldexpf.rs
deleted file mode 100644
index 95b27fc49..000000000
--- a/libm/src/math/ldexpf.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ldexpf(x: f32, n: i32) -> f32 {
-    super::scalbnf(x, n)
-}
diff --git a/libm/src/math/ldexpf128.rs b/libm/src/math/ldexpf128.rs
deleted file mode 100644
index b35277d15..000000000
--- a/libm/src/math/ldexpf128.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ldexpf128(x: f128, n: i32) -> f128 {
-    super::scalbnf128(x, n)
-}
diff --git a/libm/src/math/ldexpf16.rs b/libm/src/math/ldexpf16.rs
deleted file mode 100644
index 8de6cffd6..000000000
--- a/libm/src/math/ldexpf16.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn ldexpf16(x: f16, n: i32) -> f16 {
-    super::scalbnf16(x, n)
-}
diff --git a/libm/src/math/roundf.rs b/libm/src/math/roundf.rs
deleted file mode 100644
index b5d7c9d69..000000000
--- a/libm/src/math/roundf.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn roundf(x: f32) -> f32 {
-    super::generic::round(x)
-}
diff --git a/libm/src/math/roundf128.rs b/libm/src/math/roundf128.rs
deleted file mode 100644
index fc3164929..000000000
--- a/libm/src/math/roundf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn roundf128(x: f128) -> f128 {
-    super::generic::round(x)
-}
diff --git a/libm/src/math/roundf16.rs b/libm/src/math/roundf16.rs
deleted file mode 100644
index 8b356eaab..000000000
--- a/libm/src/math/roundf16.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn roundf16(x: f16) -> f16 {
-    super::generic::round(x)
-}
diff --git a/libm/src/math/scalbnf.rs b/libm/src/math/scalbnf.rs
deleted file mode 100644
index 57e7ba76f..000000000
--- a/libm/src/math/scalbnf.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbnf(x: f32, n: i32) -> f32 {
-    super::generic::scalbn(x, n)
-}
diff --git a/libm/src/math/scalbnf128.rs b/libm/src/math/scalbnf128.rs
deleted file mode 100644
index c1d2b4855..000000000
--- a/libm/src/math/scalbnf128.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbnf128(x: f128, n: i32) -> f128 {
-    super::generic::scalbn(x, n)
-}
diff --git a/libm/src/math/scalbnf16.rs b/libm/src/math/scalbnf16.rs
deleted file mode 100644
index 2209e1a17..000000000
--- a/libm/src/math/scalbnf16.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn scalbnf16(x: f16, n: i32) -> f16 {
-    super::generic::scalbn(x, n)
-}
diff --git a/libm/src/math/sqrtf.rs b/libm/src/math/sqrtf.rs
deleted file mode 100644
index c28a705e3..000000000
--- a/libm/src/math/sqrtf.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-/// The square root of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrtf(x: f32) -> f32 {
-    select_implementation! {
-        name: sqrtf,
-        use_arch: any(
-            all(target_arch = "aarch64", target_feature = "neon"),
-            all(target_arch = "wasm32", intrinsics_enabled),
-            target_feature = "sse2"
-        ),
-        args: x,
-    }
-
-    super::generic::sqrt(x)
-}
diff --git a/libm/src/math/sqrtf128.rs b/libm/src/math/sqrtf128.rs
deleted file mode 100644
index eaef6ae0c..000000000
--- a/libm/src/math/sqrtf128.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-/// The square root of `x` (f128).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrtf128(x: f128) -> f128 {
-    return super::generic::sqrt(x);
-}
diff --git a/libm/src/math/sqrtf16.rs b/libm/src/math/sqrtf16.rs
deleted file mode 100644
index 7bedb7f8b..000000000
--- a/libm/src/math/sqrtf16.rs
+++ /dev/null
@@ -1,11 +0,0 @@
-/// The square root of `x` (f16).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn sqrtf16(x: f16) -> f16 {
-    select_implementation! {
-        name: sqrtf16,
-        use_arch: all(target_arch = "aarch64", target_feature = "fp16"),
-        args: x,
-    }
-
-    return super::generic::sqrt(x);
-}
diff --git a/libm/src/math/truncf.rs b/libm/src/math/truncf.rs
deleted file mode 100644
index 14533a267..000000000
--- a/libm/src/math/truncf.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-/// Rounds the number toward 0 to the closest integral value (f32).
-///
-/// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn truncf(x: f32) -> f32 {
-    select_implementation! {
-        name: truncf,
-        use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        args: x,
-    }
-
-    super::generic::trunc(x)
-}
-
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn sanity_check() {
-        assert_eq!(super::truncf(1.1), 1.0);
-    }
-}
diff --git a/libm/src/math/truncf128.rs b/libm/src/math/truncf128.rs
deleted file mode 100644
index 9dccc0d0e..000000000
--- a/libm/src/math/truncf128.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Rounds the number toward 0 to the closest integral value (f128).
-///
-/// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn truncf128(x: f128) -> f128 {
-    super::generic::trunc(x)
-}
diff --git a/libm/src/math/truncf16.rs b/libm/src/math/truncf16.rs
deleted file mode 100644
index d7c3d225c..000000000
--- a/libm/src/math/truncf16.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-/// Rounds the number toward 0 to the closest integral value (f16).
-///
-/// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn truncf16(x: f16) -> f16 {
-    super::generic::trunc(x)
-}

From 7365ea4b0645879ab6520c77bebf01f1cd6ead35 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 23 May 2025 17:26:39 +0000
Subject: [PATCH 029/133] Update `CmpResult` to use a pointer-sized return type

As seen at [1], LLVM uses `long long` on LLP64 (to get a 64-bit integer
matching pointer size) and `long` on everything else, with exceptions
for AArch64 and AVR. Our current logic always uses an `i32`. This
happens to work because LLVM uses 32-bit instructions to check the
output on x86-64, but the GCC checks the full 64-bit register so garbage
in the upper half leads to incorrect results.

Update our return type to be `isize`, with exceptions for AArch64 and
AVR.

Fixes: https://github.com/rust-lang/compiler-builtins/issues/919

[1]: https://github.com/llvm/llvm-project/blob/0cf3c437c18ed27d9663d87804a9a15ff6874af2/compiler-rt/lib/builtins/fp_compare_impl.inc#L11-L27
---
 builtins-test/benches/float_cmp.rs | 43 +++++++++++++++++++-----------
 builtins-test/src/bench.rs         |  4 +--
 compiler-builtins/src/float/cmp.rs | 25 +++++++++++------
 libm/src/math/support/mod.rs       |  2 ++
 4 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/builtins-test/benches/float_cmp.rs b/builtins-test/benches/float_cmp.rs
index 42d665239..87a89efb5 100644
--- a/builtins-test/benches/float_cmp.rs
+++ b/builtins-test/benches/float_cmp.rs
@@ -1,12 +1,23 @@
 #![cfg_attr(f128_enabled, feature(f128))]
 
 use builtins_test::float_bench;
-use compiler_builtins::float::cmp;
+use compiler_builtins::float::cmp::{self, CmpResult};
 use criterion::{Criterion, criterion_main};
 
 /// `gt` symbols are allowed to return differing results, they just get compared
 /// to 0.
-fn gt_res_eq(a: i32, b: i32) -> bool {
+fn gt_res_eq(mut a: CmpResult, mut b: CmpResult) -> bool {
+    // FIXME: Our CmpResult used to be `i32`, but GCC/LLVM expect `isize`. on 64-bit platforms,
+    // this means the top half of the word may be garbage if built with an old version of
+    // `compiler-builtins`, so add a hack around this.
+    //
+    // This can be removed once a version of `compiler-builtins` with the return type fix makes
+    // it upstream.
+    if size_of::<CmpResult>() == 8 {
+        a = a as i32 as CmpResult;
+        b = b as i32 as CmpResult;
+    }
+
     let a_lt_0 = a <= 0;
     let b_lt_0 = b <= 0;
     (a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0)
@@ -14,14 +25,14 @@ fn gt_res_eq(a: i32, b: i32) -> bool {
 
 float_bench! {
     name: cmp_f32_gt,
-    sig: (a: f32, b: f32) -> i32,
+    sig: (a: f32, b: f32) -> CmpResult,
     crate_fn: cmp::__gtsf2,
     sys_fn: __gtsf2,
     sys_available: all(),
     output_eq: gt_res_eq,
     asm: [
         #[cfg(target_arch = "x86_64")] {
-            let ret: i32;
+            let ret: CmpResult;
             asm!(
                 "xor     {ret:e}, {ret:e}",
                 "ucomiss {a}, {b}",
@@ -36,7 +47,7 @@ float_bench! {
         };
 
         #[cfg(target_arch = "aarch64")] {
-            let ret: i32;
+            let ret: CmpResult;
             asm!(
                 "fcmp    {a:s}, {b:s}",
                 "cset    {ret:w}, gt",
@@ -53,13 +64,13 @@ float_bench! {
 
 float_bench! {
     name: cmp_f32_unord,
-    sig: (a: f32, b: f32) -> i32,
+    sig: (a: f32, b: f32) -> CmpResult,
     crate_fn: cmp::__unordsf2,
     sys_fn: __unordsf2,
     sys_available: all(),
     asm: [
         #[cfg(target_arch = "x86_64")] {
-            let ret: i32;
+            let ret: CmpResult;
             asm!(
                 "xor     {ret:e}, {ret:e}",
                 "ucomiss {a}, {b}",
@@ -74,7 +85,7 @@ float_bench! {
         };
 
         #[cfg(target_arch = "aarch64")] {
-            let ret: i32;
+            let ret: CmpResult;
             asm!(
                 "fcmp    {a:s}, {b:s}",
                 "cset    {ret:w}, vs",
@@ -91,14 +102,14 @@ float_bench! {
 
 float_bench! {
     name: cmp_f64_gt,
-    sig: (a: f64, b: f64) -> i32,
+    sig: (a: f64, b: f64) -> CmpResult,
     crate_fn: cmp::__gtdf2,
     sys_fn: __gtdf2,
     sys_available: all(),
     output_eq: gt_res_eq,
     asm: [
         #[cfg(target_arch = "x86_64")] {
-            let ret: i32;
+            let ret: CmpResult;
             asm!(
                 "xor     {ret:e}, {ret:e}",
                 "ucomisd {a}, {b}",
@@ -113,7 +124,7 @@ float_bench! {
         };
 
         #[cfg(target_arch = "aarch64")] {
-            let ret: i32;
+            let ret: CmpResult;
             asm!(
                 "fcmp    {a:d}, {b:d}",
                 "cset {ret:w}, gt",
@@ -130,13 +141,13 @@ float_bench! {
 
 float_bench! {
     name: cmp_f64_unord,
-    sig: (a: f64, b: f64) -> i32,
+    sig: (a: f64, b: f64) -> CmpResult,
     crate_fn: cmp::__unorddf2,
     sys_fn: __unorddf2,
     sys_available: all(),
     asm: [
         #[cfg(target_arch = "x86_64")] {
-            let ret: i32;
+            let ret: CmpResult;
             asm!(
                 "xor     {ret:e}, {ret:e}",
                 "ucomisd {a}, {b}",
@@ -151,7 +162,7 @@ float_bench! {
         };
 
         #[cfg(target_arch = "aarch64")] {
-            let ret: i32;
+            let ret: CmpResult;
             asm!(
                 "fcmp    {a:d}, {b:d}",
                 "cset    {ret:w}, vs",
@@ -168,7 +179,7 @@ float_bench! {
 
 float_bench! {
     name: cmp_f128_gt,
-    sig: (a: f128, b: f128) -> i32,
+    sig: (a: f128, b: f128) -> CmpResult,
     crate_fn: cmp::__gttf2,
     crate_fn_ppc: cmp::__gtkf2,
     sys_fn: __gttf2,
@@ -180,7 +191,7 @@ float_bench! {
 
 float_bench! {
     name: cmp_f128_unord,
-    sig: (a: f128, b: f128) -> i32,
+    sig: (a: f128, b: f128) -> CmpResult,
     crate_fn: cmp::__unordtf2,
     crate_fn_ppc: cmp::__unordkf2,
     sys_fn: __unordtf2,
diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs
index 2348f6bc9..098718567 100644
--- a/builtins-test/src/bench.rs
+++ b/builtins-test/src/bench.rs
@@ -358,8 +358,8 @@ impl_testio!(float f16);
 impl_testio!(float f32, f64);
 #[cfg(f128_enabled)]
 impl_testio!(float f128);
-impl_testio!(int i16, i32, i64, i128);
-impl_testio!(int u16, u32, u64, u128);
+impl_testio!(int i8, i16, i32, i64, i128, isize);
+impl_testio!(int u8, u16, u32, u64, u128, usize);
 impl_testio!((float, int)(f32, i32));
 impl_testio!((float, int)(f64, i32));
 #[cfg(f128_enabled)]
diff --git a/compiler-builtins/src/float/cmp.rs b/compiler-builtins/src/float/cmp.rs
index 296952821..f1e54dc1c 100644
--- a/compiler-builtins/src/float/cmp.rs
+++ b/compiler-builtins/src/float/cmp.rs
@@ -2,14 +2,23 @@
 
 use crate::float::Float;
 use crate::int::MinInt;
-
-// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L22
-#[cfg(target_arch = "avr")]
-pub type CmpResult = i8;
-
-// https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/compiler-rt/lib/builtins/fp_compare_impl.inc#L25
-#[cfg(not(target_arch = "avr"))]
-pub type CmpResult = i32;
+use crate::support::cfg_if;
+
+// Taken from LLVM config:
+// https://github.com/llvm/llvm-project/blob/0cf3c437c18ed27d9663d87804a9a15ff6874af2/compiler-rt/lib/builtins/fp_compare_impl.inc#L11-L27
+cfg_if! {
+    if #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] {
+        // Aarch64 uses `int` rather than a pointer-sized value.
+        pub type CmpResult = i32;
+    } else if #[cfg(target_arch = "avr")] {
+        // AVR uses a single byte.
+        pub type CmpResult = i8;
+    } else {
+        // In compiler-rt, LLP64 ABIs use `long long` and everything else uses `long`. In effect,
+        // this means the return value is always pointer-sized.
+        pub type CmpResult = isize;
+    }
+}
 
 #[derive(Clone, Copy)]
 enum Result {
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
index a4f596ab8..2771cfd32 100644
--- a/libm/src/math/support/mod.rs
+++ b/libm/src/math/support/mod.rs
@@ -11,6 +11,8 @@ mod int_traits;
 
 #[allow(unused_imports)]
 pub use big::{i256, u256};
+#[allow(unused_imports)]
+pub(crate) use cfg_if;
 pub use env::{FpResult, Round, Status};
 #[allow(unused_imports)]
 pub use float_traits::{DFloat, Float, HFloat, IntTy};

From 347adad2a3907e7e5bee51b7582fbc5a54a8e51b Mon Sep 17 00:00:00 2001
From: Dario Damiani <154735680+D-Dario0@users.noreply.github.com>
Date: Wed, 28 May 2025 20:48:05 +0200
Subject: [PATCH 030/133] Typo in README.md

Link to Apache License changed from htps:// to https://
---
 libm/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libm/README.md b/libm/README.md
index 349e892df..77608db3d 100644
--- a/libm/README.md
+++ b/libm/README.md
@@ -34,7 +34,7 @@ Usage is under the MIT license, available at
 ### Contribution
 
 Contributions are licensed under both the MIT license and the Apache License,
-Version 2.0, available at <htps://www.apache.org/licenses/LICENSE-2.0>. Unless
+Version 2.0, available at <https://www.apache.org/licenses/LICENSE-2.0>. Unless
 you explicitly state otherwise, any contribution intentionally submitted for
 inclusion in the work by you, as defined in the Apache-2.0 license, shall be
 dual licensed as mentioned, without any additional terms or conditions.

From fc34c3edad294c2035c24968d43e3d2ce8fbc471 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 28 May 2025 21:08:41 +0000
Subject: [PATCH 031/133] aarch64: Add a note saying why we use `frintx` rather
 than `frintn`

---
 libm/src/math/arch/aarch64.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libm/src/math/arch/aarch64.rs b/libm/src/math/arch/aarch64.rs
index 020bb731c..8896804b5 100644
--- a/libm/src/math/arch/aarch64.rs
+++ b/libm/src/math/arch/aarch64.rs
@@ -30,6 +30,12 @@ pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 {
     x
 }
 
+// NB: `frintx` is technically the correct instruction for C's `rint`. However, in Rust (and LLVM
+// by default), `rint` is identical to `roundeven` (no fpenv interaction) so we use the
+// side-effect-free `frintn`.
+//
+// In general, C code that calls Rust's libm should assume that fpenv is ignored.
+
 pub fn rint(mut x: f64) -> f64 {
     // SAFETY: `frintn` is available with neon and has no side effects.
     //

From 0608b45a1d68f91481fc943072f01d08ceb3accb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 21 Apr 2025 09:35:55 +0000
Subject: [PATCH 032/133] cleanup: Reuse `MinInt` and `Int` from `libm` in
 `compiler-builtins`

Since the two crates are now in the same repo, it is easier to share
code. Begin some deduplication with the integer traits.
---
 builtins-test/src/lib.rs                    |  78 +++++-
 compiler-builtins/src/float/add.rs          |  22 +-
 compiler-builtins/src/float/conv.rs         |  24 +-
 compiler-builtins/src/float/div.rs          |   2 +-
 compiler-builtins/src/float/mul.rs          |   2 +-
 compiler-builtins/src/float/traits.rs       |   4 +-
 compiler-builtins/src/int/addsub.rs         |   6 +-
 compiler-builtins/src/int/big.rs            |   4 +-
 compiler-builtins/src/int/leading_zeros.rs  |  64 +++--
 compiler-builtins/src/int/trailing_zeros.rs |  25 +-
 compiler-builtins/src/int/traits.rs         | 273 +-------------------
 libm/src/math/support/int_traits.rs         |   9 +
 12 files changed, 168 insertions(+), 345 deletions(-)

diff --git a/builtins-test/src/lib.rs b/builtins-test/src/lib.rs
index c596ac213..f1673133b 100644
--- a/builtins-test/src/lib.rs
+++ b/builtins-test/src/lib.rs
@@ -40,6 +40,75 @@ pub const N: u32 = if cfg!(target_arch = "x86_64") && !cfg!(debug_assertions) {
     10_000
 };
 
+/// Additional constants that determine how the integer gets fuzzed.
+trait FuzzInt: MinInt {
+    /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing
+    /// in `builtins-test`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,
+    /// 111,112,119,120,125,126,127].
+    const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(Self::BITS);
+
+    /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128.
+    const FUZZ_NUM: usize = {
+        let log2 = Self::BITS.ilog2() as usize;
+        if log2 == 3 {
+            // case for u8
+            6
+        } else {
+            // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
+            // boundaries.
+            8 + (4 * (log2 - 4))
+        }
+    };
+}
+
+impl<I> FuzzInt for I where I: MinInt {}
+
+const fn make_fuzz_lengths(bits: u32) -> [u8; 20] {
+    let mut v = [0u8; 20];
+    v[0] = 0;
+    v[1] = 1;
+    v[2] = 2; // important for parity and the iX::MIN case when reversed
+    let mut i = 3;
+
+    // No need for any more until the byte boundary, because there should be no algorithms
+    // that are sensitive to anything not next to byte boundaries after 2. We also scale
+    // in powers of two, which is important to prevent u128 corner tests from getting too
+    // big.
+    let mut l = 8;
+    loop {
+        if l >= ((bits / 2) as u8) {
+            break;
+        }
+        // get both sides of the byte boundary
+        v[i] = l - 1;
+        i += 1;
+        v[i] = l;
+        i += 1;
+        l *= 2;
+    }
+
+    if bits != 8 {
+        // add the lower side of the middle boundary
+        v[i] = ((bits / 2) - 1) as u8;
+        i += 1;
+    }
+
+    // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
+    // boundary because of algorithms that split the high part up. We reverse the scaling
+    // as we go to Self::BITS.
+    let mid = i;
+    let mut j = 1;
+    loop {
+        v[i] = (bits as u8) - (v[mid - j]) - 1;
+        if j == mid {
+            break;
+        }
+        i += 1;
+        j += 1;
+    }
+    v
+}
+
 /// Random fuzzing step. When run several times, it results in excellent fuzzing entropy such as:
 /// 11110101010101011110111110011111
 /// 10110101010100001011101011001010
@@ -92,10 +161,9 @@ fn fuzz_step<I: Int>(rng: &mut Xoshiro128StarStar, x: &mut I) {
 macro_rules! edge_cases {
     ($I:ident, $case:ident, $inner:block) => {
         for i0 in 0..$I::FUZZ_NUM {
-            let mask_lo = (!$I::UnsignedInt::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32);
+            let mask_lo = (!$I::Unsigned::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32);
             for i1 in i0..I::FUZZ_NUM {
-                let mask_hi =
-                    (!$I::UnsignedInt::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32);
+                let mask_hi = (!$I::Unsigned::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32);
                 let $case = I::from_unsigned(mask_lo & mask_hi);
                 $inner
             }
@@ -107,7 +175,7 @@ macro_rules! edge_cases {
 /// edge cases, followed by a more random fuzzer that runs `n` times.
 pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F)
 where
-    <I as MinInt>::UnsignedInt: Int,
+    <I as MinInt>::Unsigned: Int,
 {
     // edge case tester. Calls `f` 210 times for u128.
     // zero gets skipped by the loop
@@ -128,7 +196,7 @@ where
 /// The same as `fuzz`, except `f` has two inputs.
 pub fn fuzz_2<I: Int, F: Fn(I, I)>(n: u32, f: F)
 where
-    <I as MinInt>::UnsignedInt: Int,
+    <I as MinInt>::Unsigned: Int,
 {
     // Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`.
     edge_cases!(I, case, {
diff --git a/compiler-builtins/src/float/add.rs b/compiler-builtins/src/float/add.rs
index 0426c9cc4..43e3ae931 100644
--- a/compiler-builtins/src/float/add.rs
+++ b/compiler-builtins/src/float/add.rs
@@ -1,5 +1,5 @@
 use crate::float::Float;
-use crate::int::{CastInto, Int, MinInt};
+use crate::int::{CastFrom, CastInto, Int, MinInt};
 
 /// Returns `a + b`
 fn add<F: Float>(a: F, b: F) -> F
@@ -12,7 +12,7 @@ where
     let one = F::Int::ONE;
     let zero = F::Int::ZERO;
 
-    let bits = F::BITS.cast();
+    let bits: F::Int = F::BITS.cast();
     let significand_bits = F::SIG_BITS;
     let max_exponent = F::EXP_SAT;
 
@@ -115,9 +115,10 @@ where
     let align = a_exponent.wrapping_sub(b_exponent).cast();
     if align != MinInt::ZERO {
         if align < bits {
-            let sticky =
-                F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
-            b_significand = (b_significand >> align.cast()) | sticky;
+            let sticky = F::Int::from_bool(
+                b_significand << u32::cast_from(bits.wrapping_sub(align)) != MinInt::ZERO,
+            );
+            b_significand = (b_significand >> u32::cast_from(align)) | sticky;
         } else {
             b_significand = one; // sticky; b is known to be non-zero.
         }
@@ -132,8 +133,8 @@ where
         // If partial cancellation occured, we need to left-shift the result
         // and adjust the exponent:
         if a_significand < implicit_bit << 3 {
-            let shift =
-                a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32;
+            let shift = a_significand.leading_zeros() as i32
+                - (implicit_bit << 3u32).leading_zeros() as i32;
             a_significand <<= shift;
             a_exponent -= shift;
         }
@@ -159,9 +160,10 @@ where
         // Result is denormal before rounding; the exponent is zero and we
         // need to shift the significand.
         let shift = (1 - a_exponent).cast();
-        let sticky =
-            F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
-        a_significand = (a_significand >> shift.cast()) | sticky;
+        let sticky = F::Int::from_bool(
+            (a_significand << u32::cast_from(bits.wrapping_sub(shift))) != MinInt::ZERO,
+        );
+        a_significand = (a_significand >> u32::cast_from(shift)) | sticky;
         a_exponent = 0;
     }
 
diff --git a/compiler-builtins/src/float/conv.rs b/compiler-builtins/src/float/conv.rs
index f5427a113..9d732f2cd 100644
--- a/compiler-builtins/src/float/conv.rs
+++ b/compiler-builtins/src/float/conv.rs
@@ -72,7 +72,7 @@ mod int_to_float {
         F: Float,
         I: Int,
         F::Int: CastFrom<I>,
-        Conv: Fn(I::UnsignedInt) -> F::Int,
+        Conv: Fn(I::Unsigned) -> F::Int,
     {
         let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
         F::from_bits(conv(i.unsigned_abs()) | sign_bit)
@@ -313,10 +313,10 @@ intrinsics! {
 fn float_to_unsigned_int<F, U>(f: F) -> U
 where
     F: Float,
-    U: Int<UnsignedInt = U>,
+    U: Int<Unsigned = U>,
     F::Int: CastInto<U>,
     F::Int: CastFrom<u32>,
-    F::Int: CastInto<U::UnsignedInt>,
+    F::Int: CastInto<U::Unsigned>,
     u32: CastFrom<F::Int>,
 {
     float_to_int_inner::<F, U, _, _>(f.to_bits(), |i: U| i, || U::MAX)
@@ -327,8 +327,8 @@ fn float_to_signed_int<F, I>(f: F) -> I
 where
     F: Float,
     I: Int + Neg<Output = I>,
-    I::UnsignedInt: Int,
-    F::Int: CastInto<I::UnsignedInt>,
+    I::Unsigned: Int,
+    F::Int: CastInto<I::Unsigned>,
     F::Int: CastFrom<u32>,
     u32: CastFrom<F::Int>,
 {
@@ -355,27 +355,27 @@ where
     I: Int,
     FnFoo: FnOnce(I) -> I,
     FnOob: FnOnce() -> I,
-    I::UnsignedInt: Int,
-    F::Int: CastInto<I::UnsignedInt>,
+    I::Unsigned: Int,
+    F::Int: CastInto<I::Unsigned>,
     F::Int: CastFrom<u32>,
     u32: CastFrom<F::Int>,
 {
     let int_max_exp = F::EXP_BIAS + I::MAX.ilog2() + 1;
-    let foobar = F::EXP_BIAS + I::UnsignedInt::BITS - 1;
+    let foobar = F::EXP_BIAS + I::Unsigned::BITS - 1;
 
     if fbits < F::ONE.to_bits() {
         // < 0 gets rounded to 0
         I::ZERO
     } else if fbits < F::Int::cast_from(int_max_exp) << F::SIG_BITS {
         // >= 1, < integer max
-        let m_base = if I::UnsignedInt::BITS >= F::Int::BITS {
-            I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1)
+        let m_base = if I::Unsigned::BITS >= F::Int::BITS {
+            I::Unsigned::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1)
         } else {
-            I::UnsignedInt::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1))
+            I::Unsigned::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1))
         };
 
         // Set the implicit 1-bit.
-        let m: I::UnsignedInt = (I::UnsignedInt::ONE << (I::BITS - 1)) | m_base;
+        let m: I::Unsigned = (I::Unsigned::ONE << (I::BITS - 1)) | m_base;
 
         // Shift based on the exponent and bias.
         let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS);
diff --git a/compiler-builtins/src/float/div.rs b/compiler-builtins/src/float/div.rs
index 5df637c7e..3e4f0e20d 100644
--- a/compiler-builtins/src/float/div.rs
+++ b/compiler-builtins/src/float/div.rs
@@ -370,7 +370,7 @@ where
         let hi_corr: F::Int = corr_uq1 >> hw;
 
         // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
-        let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
+        let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1u32)
             .wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
             // 1 to account for the highest bit of corr_UQ1 can be 1
             // 1 to account for possible carry
diff --git a/compiler-builtins/src/float/mul.rs b/compiler-builtins/src/float/mul.rs
index 7f1f19d9b..c811f1406 100644
--- a/compiler-builtins/src/float/mul.rs
+++ b/compiler-builtins/src/float/mul.rs
@@ -143,7 +143,7 @@ where
         // a zero of the appropriate sign.  Mathematically there is no need to
         // handle this case separately, but we make it a special case to
         // simplify the shift logic.
-        let shift = one.wrapping_sub(product_exponent.cast()).cast();
+        let shift: u32 = one.wrapping_sub(product_exponent.cast()).cast();
         if shift >= bits {
             return F::from_bits(product_sign);
         }
diff --git a/compiler-builtins/src/float/traits.rs b/compiler-builtins/src/float/traits.rs
index 8ccaa7bcb..a30d20900 100644
--- a/compiler-builtins/src/float/traits.rs
+++ b/compiler-builtins/src/float/traits.rs
@@ -20,10 +20,10 @@ pub trait Float:
     + ops::Rem<Output = Self>
 {
     /// A uint of the same width as the float
-    type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
+    type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
+    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
 
     /// An int capable of containing the exponent bits plus a sign bit. This is signed.
     type ExpInt: Int;
diff --git a/compiler-builtins/src/int/addsub.rs b/compiler-builtins/src/int/addsub.rs
index 1f84e8eb1..b2b21fc2c 100644
--- a/compiler-builtins/src/int/addsub.rs
+++ b/compiler-builtins/src/int/addsub.rs
@@ -22,7 +22,7 @@ impl UAddSub for u128 {}
 
 trait AddSub: Int
 where
-    <Self as MinInt>::UnsignedInt: UAddSub,
+    <Self as MinInt>::Unsigned: UAddSub,
 {
     fn add(self, other: Self) -> Self {
         Self::from_unsigned(self.unsigned().uadd(other.unsigned()))
@@ -37,7 +37,7 @@ impl AddSub for i128 {}
 
 trait Addo: AddSub
 where
-    <Self as MinInt>::UnsignedInt: UAddSub,
+    <Self as MinInt>::Unsigned: UAddSub,
 {
     fn addo(self, other: Self) -> (Self, bool) {
         let sum = AddSub::add(self, other);
@@ -50,7 +50,7 @@ impl Addo for u128 {}
 
 trait Subo: AddSub
 where
-    <Self as MinInt>::UnsignedInt: UAddSub,
+    <Self as MinInt>::Unsigned: UAddSub,
 {
     fn subo(self, other: Self) -> (Self, bool) {
         let sum = AddSub::sub(self, other);
diff --git a/compiler-builtins/src/int/big.rs b/compiler-builtins/src/int/big.rs
index 1402efb8e..8e0600909 100644
--- a/compiler-builtins/src/int/big.rs
+++ b/compiler-builtins/src/int/big.rs
@@ -45,7 +45,7 @@ impl i256 {
 impl MinInt for u256 {
     type OtherSign = i256;
 
-    type UnsignedInt = u256;
+    type Unsigned = u256;
 
     const SIGNED: bool = false;
     const BITS: u32 = 256;
@@ -58,7 +58,7 @@ impl MinInt for u256 {
 impl MinInt for i256 {
     type OtherSign = u256;
 
-    type UnsignedInt = u256;
+    type Unsigned = u256;
 
     const SIGNED: bool = false;
     const BITS: u32 = 256;
diff --git a/compiler-builtins/src/int/leading_zeros.rs b/compiler-builtins/src/int/leading_zeros.rs
index 112f4d036..aa5cb3993 100644
--- a/compiler-builtins/src/int/leading_zeros.rs
+++ b/compiler-builtins/src/int/leading_zeros.rs
@@ -9,11 +9,14 @@ pub use implementation::{leading_zeros_default, leading_zeros_riscv};
 pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv};
 
 mod implementation {
-    use crate::int::{CastInto, Int};
+    use crate::int::{CastFrom, Int};
 
     /// Returns the number of leading binary zeros in `x`.
     #[allow(dead_code)]
-    pub fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
+    pub fn leading_zeros_default<I: Int>(x: I) -> usize
+    where
+        usize: CastFrom<I>,
+    {
         // The basic idea is to test if the higher bits of `x` are zero and bisect the number
         // of leading zeros. It is possible for all branches of the bisection to use the same
         // code path by conditionally shifting the higher parts down to let the next bisection
@@ -23,44 +26,48 @@ mod implementation {
         // because it simplifies the final bisection step.
         let mut x = x;
         // the number of potential leading zeros
-        let mut z = T::BITS as usize;
+        let mut z = I::BITS as usize;
         // a temporary
-        let mut t: T;
+        let mut t: I;
 
-        const { assert!(T::BITS <= 64) };
-        if T::BITS >= 64 {
+        const { assert!(I::BITS <= 64) };
+        if I::BITS >= 64 {
             t = x >> 32;
-            if t != T::ZERO {
+            if t != I::ZERO {
                 z -= 32;
                 x = t;
             }
         }
-        if T::BITS >= 32 {
+        if I::BITS >= 32 {
             t = x >> 16;
-            if t != T::ZERO {
+            if t != I::ZERO {
                 z -= 16;
                 x = t;
             }
         }
-        const { assert!(T::BITS >= 16) };
+        const { assert!(I::BITS >= 16) };
         t = x >> 8;
-        if t != T::ZERO {
+        if t != I::ZERO {
             z -= 8;
             x = t;
         }
         t = x >> 4;
-        if t != T::ZERO {
+        if t != I::ZERO {
             z -= 4;
             x = t;
         }
         t = x >> 2;
-        if t != T::ZERO {
+        if t != I::ZERO {
             z -= 2;
             x = t;
         }
         // the last two bisections are combined into one conditional
         t = x >> 1;
-        if t != T::ZERO { z - 2 } else { z - x.cast() }
+        if t != I::ZERO {
+            z - 2
+        } else {
+            z - usize::cast_from(x)
+        }
 
         // We could potentially save a few cycles by using the LUT trick from
         // "https://embeddedgurus.com/state-space/2014/09/
@@ -82,10 +89,13 @@ mod implementation {
 
     /// Returns the number of leading binary zeros in `x`.
     #[allow(dead_code)]
-    pub fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
+    pub fn leading_zeros_riscv<I: Int>(x: I) -> usize
+    where
+        usize: CastFrom<I>,
+    {
         let mut x = x;
         // the number of potential leading zeros
-        let mut z = T::BITS;
+        let mut z = I::BITS;
         // a temporary
         let mut t: u32;
 
@@ -97,11 +107,11 @@ mod implementation {
         // right). If we try to save an instruction by using `x < imm` for each bisection, we
         // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
         // but the immediate will never fit into 12 bits and never save an instruction.
-        const { assert!(T::BITS <= 64) };
-        if T::BITS >= 64 {
+        const { assert!(I::BITS <= 64) };
+        if I::BITS >= 64 {
             // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
             // `t` is set to 0.
-            t = ((x >= (T::ONE << 32)) as u32) << 5;
+            t = ((x >= (I::ONE << 32)) as u32) << 5;
             // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
             // next step to process.
             x >>= t;
@@ -109,27 +119,27 @@ mod implementation {
             // leading zeros
             z -= t;
         }
-        if T::BITS >= 32 {
-            t = ((x >= (T::ONE << 16)) as u32) << 4;
+        if I::BITS >= 32 {
+            t = ((x >= (I::ONE << 16)) as u32) << 4;
             x >>= t;
             z -= t;
         }
-        const { assert!(T::BITS >= 16) };
-        t = ((x >= (T::ONE << 8)) as u32) << 3;
+        const { assert!(I::BITS >= 16) };
+        t = ((x >= (I::ONE << 8)) as u32) << 3;
         x >>= t;
         z -= t;
-        t = ((x >= (T::ONE << 4)) as u32) << 2;
+        t = ((x >= (I::ONE << 4)) as u32) << 2;
         x >>= t;
         z -= t;
-        t = ((x >= (T::ONE << 2)) as u32) << 1;
+        t = ((x >= (I::ONE << 2)) as u32) << 1;
         x >>= t;
         z -= t;
-        t = (x >= (T::ONE << 1)) as u32;
+        t = (x >= (I::ONE << 1)) as u32;
         x >>= t;
         z -= t;
         // All bits except the LSB are guaranteed to be zero for this final bisection step.
         // If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
-        z as usize - x.cast()
+        z as usize - usize::cast_from(x)
     }
 }
 
diff --git a/compiler-builtins/src/int/trailing_zeros.rs b/compiler-builtins/src/int/trailing_zeros.rs
index c45d6b1cf..8f63c22c8 100644
--- a/compiler-builtins/src/int/trailing_zeros.rs
+++ b/compiler-builtins/src/int/trailing_zeros.rs
@@ -4,33 +4,38 @@ pub use implementation::trailing_zeros;
 pub(crate) use implementation::trailing_zeros;
 
 mod implementation {
-    use crate::int::{CastInto, Int};
+    use crate::int::{CastFrom, Int};
 
     /// Returns number of trailing binary zeros in `x`.
     #[allow(dead_code)]
-    pub fn trailing_zeros<T: Int + CastInto<u32> + CastInto<u16> + CastInto<u8>>(x: T) -> usize {
+    pub fn trailing_zeros<I: Int>(x: I) -> usize
+    where
+        u32: CastFrom<I>,
+        u16: CastFrom<I>,
+        u8: CastFrom<I>,
+    {
         let mut x = x;
         let mut r: u32 = 0;
         let mut t: u32;
 
-        const { assert!(T::BITS <= 64) };
-        if T::BITS >= 64 {
-            r += ((CastInto::<u32>::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0
+        const { assert!(I::BITS <= 64) };
+        if I::BITS >= 64 {
+            r += ((u32::cast_from(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0
             x >>= r; // remove 32 zero bits
         }
 
-        if T::BITS >= 32 {
-            t = ((CastInto::<u16>::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0
+        if I::BITS >= 32 {
+            t = ((u16::cast_from(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0
             r += t;
             x >>= t; // x = [0 - 0xFFFF] + higher garbage bits
         }
 
-        const { assert!(T::BITS >= 16) };
-        t = ((CastInto::<u8>::cast(x) == 0) as u32) << 3;
+        const { assert!(I::BITS >= 16) };
+        t = ((u8::cast_from(x) == 0) as u32) << 3;
         x >>= t; // x = [0 - 0xFF] + higher garbage bits
         r += t;
 
-        let mut x: u8 = x.cast();
+        let mut x: u8 = x.cast_lossy();
 
         t = (((x & 0x0F) == 0) as u32) << 2;
         x >>= t; // x = [0 - 0xF] + higher garbage bits
diff --git a/compiler-builtins/src/int/traits.rs b/compiler-builtins/src/int/traits.rs
index 152cb2eee..b474df366 100644
--- a/compiler-builtins/src/int/traits.rs
+++ b/compiler-builtins/src/int/traits.rs
@@ -1,275 +1,4 @@
-use core::ops;
-
-/// Minimal integer implementations needed on all integer types, including wide integers.
-#[allow(dead_code)]
-pub trait MinInt:
-    Copy
-    + core::fmt::Debug
-    + ops::BitOr<Output = Self>
-    + ops::Not<Output = Self>
-    + ops::Shl<u32, Output = Self>
-{
-    /// Type with the same width but other signedness
-    type OtherSign: MinInt;
-    /// Unsigned version of Self
-    type UnsignedInt: MinInt;
-
-    /// If `Self` is a signed integer
-    const SIGNED: bool;
-
-    /// The bitwidth of the int type
-    const BITS: u32;
-
-    const ZERO: Self;
-    const ONE: Self;
-    const MIN: Self;
-    const MAX: Self;
-}
-
-/// Trait for some basic operations on integers
-#[allow(dead_code)]
-pub trait Int:
-    MinInt
-    + PartialEq
-    + PartialOrd
-    + ops::AddAssign
-    + ops::SubAssign
-    + ops::BitAndAssign
-    + ops::BitOrAssign
-    + ops::BitXorAssign
-    + ops::ShlAssign<i32>
-    + ops::ShrAssign<u32>
-    + ops::Add<Output = Self>
-    + ops::Sub<Output = Self>
-    + ops::Mul<Output = Self>
-    + ops::Div<Output = Self>
-    + ops::Shr<u32, Output = Self>
-    + ops::BitXor<Output = Self>
-    + ops::BitAnd<Output = Self>
-{
-    /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing
-    /// in `builtins-test`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,
-    /// 111,112,119,120,125,126,127].
-    const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(<Self as MinInt>::BITS);
-
-    /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128.
-    const FUZZ_NUM: usize = {
-        let log2 = (<Self as MinInt>::BITS - 1).count_ones() as usize;
-        if log2 == 3 {
-            // case for u8
-            6
-        } else {
-            // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
-            // boundaries.
-            8 + (4 * (log2 - 4))
-        }
-    };
-
-    fn unsigned(self) -> Self::UnsignedInt;
-    fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
-    fn unsigned_abs(self) -> Self::UnsignedInt;
-
-    fn from_bool(b: bool) -> Self;
-
-    /// Prevents the need for excessive conversions between signed and unsigned
-    fn logical_shr(self, other: u32) -> Self;
-
-    /// Absolute difference between two integers.
-    fn abs_diff(self, other: Self) -> Self::UnsignedInt;
-
-    // copied from primitive integers, but put in a trait
-    fn is_zero(self) -> bool;
-    fn wrapping_neg(self) -> Self;
-    fn wrapping_add(self, other: Self) -> Self;
-    fn wrapping_mul(self, other: Self) -> Self;
-    fn wrapping_sub(self, other: Self) -> Self;
-    fn wrapping_shl(self, other: u32) -> Self;
-    fn wrapping_shr(self, other: u32) -> Self;
-    fn rotate_left(self, other: u32) -> Self;
-    fn overflowing_add(self, other: Self) -> (Self, bool);
-    fn leading_zeros(self) -> u32;
-    fn ilog2(self) -> u32;
-}
-
-pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] {
-    let mut v = [0u8; 20];
-    v[0] = 0;
-    v[1] = 1;
-    v[2] = 2; // important for parity and the iX::MIN case when reversed
-    let mut i = 3;
-
-    // No need for any more until the byte boundary, because there should be no algorithms
-    // that are sensitive to anything not next to byte boundaries after 2. We also scale
-    // in powers of two, which is important to prevent u128 corner tests from getting too
-    // big.
-    let mut l = 8;
-    loop {
-        if l >= ((bits / 2) as u8) {
-            break;
-        }
-        // get both sides of the byte boundary
-        v[i] = l - 1;
-        i += 1;
-        v[i] = l;
-        i += 1;
-        l *= 2;
-    }
-
-    if bits != 8 {
-        // add the lower side of the middle boundary
-        v[i] = ((bits / 2) - 1) as u8;
-        i += 1;
-    }
-
-    // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
-    // boundary because of algorithms that split the high part up. We reverse the scaling
-    // as we go to Self::BITS.
-    let mid = i;
-    let mut j = 1;
-    loop {
-        v[i] = (bits as u8) - (v[mid - j]) - 1;
-        if j == mid {
-            break;
-        }
-        i += 1;
-        j += 1;
-    }
-    v
-}
-
-macro_rules! int_impl_common {
-    ($ty:ty) => {
-        fn from_bool(b: bool) -> Self {
-            b as $ty
-        }
-
-        fn logical_shr(self, other: u32) -> Self {
-            Self::from_unsigned(self.unsigned().wrapping_shr(other))
-        }
-
-        fn is_zero(self) -> bool {
-            self == Self::ZERO
-        }
-
-        fn wrapping_neg(self) -> Self {
-            <Self>::wrapping_neg(self)
-        }
-
-        fn wrapping_add(self, other: Self) -> Self {
-            <Self>::wrapping_add(self, other)
-        }
-
-        fn wrapping_mul(self, other: Self) -> Self {
-            <Self>::wrapping_mul(self, other)
-        }
-        fn wrapping_sub(self, other: Self) -> Self {
-            <Self>::wrapping_sub(self, other)
-        }
-
-        fn wrapping_shl(self, other: u32) -> Self {
-            <Self>::wrapping_shl(self, other)
-        }
-
-        fn wrapping_shr(self, other: u32) -> Self {
-            <Self>::wrapping_shr(self, other)
-        }
-
-        fn rotate_left(self, other: u32) -> Self {
-            <Self>::rotate_left(self, other)
-        }
-
-        fn overflowing_add(self, other: Self) -> (Self, bool) {
-            <Self>::overflowing_add(self, other)
-        }
-
-        fn leading_zeros(self) -> u32 {
-            <Self>::leading_zeros(self)
-        }
-
-        fn ilog2(self) -> u32 {
-            <Self>::ilog2(self)
-        }
-    };
-}
-
-macro_rules! int_impl {
-    ($ity:ty, $uty:ty) => {
-        impl MinInt for $uty {
-            type OtherSign = $ity;
-            type UnsignedInt = $uty;
-
-            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
-            const SIGNED: bool = Self::MIN != Self::ZERO;
-
-            const ZERO: Self = 0;
-            const ONE: Self = 1;
-            const MIN: Self = <Self>::MIN;
-            const MAX: Self = <Self>::MAX;
-        }
-
-        impl Int for $uty {
-            fn unsigned(self) -> $uty {
-                self
-            }
-
-            // It makes writing macros easier if this is implemented for both signed and unsigned
-            #[allow(clippy::wrong_self_convention)]
-            fn from_unsigned(me: $uty) -> Self {
-                me
-            }
-
-            fn unsigned_abs(self) -> Self {
-                self
-            }
-
-            fn abs_diff(self, other: Self) -> Self {
-                self.abs_diff(other)
-            }
-
-            int_impl_common!($uty);
-        }
-
-        impl MinInt for $ity {
-            type OtherSign = $uty;
-            type UnsignedInt = $uty;
-
-            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
-            const SIGNED: bool = Self::MIN != Self::ZERO;
-
-            const ZERO: Self = 0;
-            const ONE: Self = 1;
-            const MIN: Self = <Self>::MIN;
-            const MAX: Self = <Self>::MAX;
-        }
-
-        impl Int for $ity {
-            fn unsigned(self) -> $uty {
-                self as $uty
-            }
-
-            fn from_unsigned(me: $uty) -> Self {
-                me as $ity
-            }
-
-            fn unsigned_abs(self) -> Self::UnsignedInt {
-                self.unsigned_abs()
-            }
-
-            fn abs_diff(self, other: Self) -> $uty {
-                self.abs_diff(other)
-            }
-
-            int_impl_common!($ity);
-        }
-    };
-}
-
-int_impl!(isize, usize);
-int_impl!(i8, u8);
-int_impl!(i16, u16);
-int_impl!(i32, u32);
-int_impl!(i64, u64);
-int_impl!(i128, u128);
+pub use crate::support::{Int, MinInt};
 
 /// Trait for integers twice the bit width of another integer. This is implemented for all
 /// primitives except for `u8`, because there is not a smaller primitive.
diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs
index 3ec1faba1..fa9e06066 100644
--- a/libm/src/math/support/int_traits.rs
+++ b/libm/src/math/support/int_traits.rs
@@ -78,6 +78,7 @@ pub trait Int:
     fn unsigned(self) -> Self::Unsigned;
     fn from_unsigned(unsigned: Self::Unsigned) -> Self;
     fn abs(self) -> Self;
+    fn unsigned_abs(self) -> Self::Unsigned;
 
     fn from_bool(b: bool) -> Self;
 
@@ -203,6 +204,10 @@ macro_rules! int_impl {
                 unimplemented!()
             }
 
+            fn unsigned_abs(self) -> Self {
+                unimplemented!()
+            }
+
             // It makes writing macros easier if this is implemented for both signed and unsigned
             #[allow(clippy::wrong_self_convention)]
             fn from_unsigned(me: $uty) -> Self {
@@ -242,6 +247,10 @@ macro_rules! int_impl {
                 self.abs()
             }
 
+            fn unsigned_abs(self) -> Self::Unsigned {
+                self.unsigned_abs()
+            }
+
             fn from_unsigned(me: $uty) -> Self {
                 me as $ity
             }

From 6c5dd2da0b90289b9f5faf7fc15cf568e2422c9b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 29 May 2025 03:51:43 +0000
Subject: [PATCH 033/133] Reuse `libm`'s `Caat` and `CastFrom` in
 `compiler-builtins`

---
 compiler-builtins/src/float/add.rs          |  2 +-
 compiler-builtins/src/float/conv.rs         |  6 +--
 compiler-builtins/src/float/div.rs          |  2 +-
 compiler-builtins/src/float/mul.rs          |  2 +-
 compiler-builtins/src/float/trunc.rs        |  2 +-
 compiler-builtins/src/int/trailing_zeros.rs |  6 +--
 compiler-builtins/src/int/traits.rs         | 43 +--------------------
 libm/src/math/support/int_traits.rs         |  5 +++
 8 files changed, 16 insertions(+), 52 deletions(-)

diff --git a/compiler-builtins/src/float/add.rs b/compiler-builtins/src/float/add.rs
index 43e3ae931..0cc362f70 100644
--- a/compiler-builtins/src/float/add.rs
+++ b/compiler-builtins/src/float/add.rs
@@ -168,7 +168,7 @@ where
     }
 
     // Low three bits are round, guard, and sticky.
-    let a_significand_i32: i32 = a_significand.cast();
+    let a_significand_i32: i32 = a_significand.cast_lossy();
     let round_guard_sticky: i32 = a_significand_i32 & 0x7;
 
     // Shift the significand into place, and mask off the implicit bit.
diff --git a/compiler-builtins/src/float/conv.rs b/compiler-builtins/src/float/conv.rs
index 9d732f2cd..75ea7ce02 100644
--- a/compiler-builtins/src/float/conv.rs
+++ b/compiler-builtins/src/float/conv.rs
@@ -74,7 +74,7 @@ mod int_to_float {
         F::Int: CastFrom<I>,
         Conv: Fn(I::Unsigned) -> F::Int,
     {
-        let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
+        let sign_bit = F::Int::cast_from_lossy(i >> (I::BITS - 1)) << (F::BITS - 1);
         F::from_bits(conv(i.unsigned_abs()) | sign_bit)
     }
 
@@ -166,7 +166,7 @@ mod int_to_float {
 
         // Within the upper `F::BITS`, everything except for the signifcand
         // gets truncated
-        let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast();
+        let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast_lossy();
 
         // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
         // check if it is nonzero.
@@ -371,7 +371,7 @@ where
         let m_base = if I::Unsigned::BITS >= F::Int::BITS {
             I::Unsigned::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1)
         } else {
-            I::Unsigned::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1))
+            I::Unsigned::cast_from_lossy(fbits >> (F::SIG_BITS - I::BITS + 1))
         };
 
         // Set the implicit 1-bit.
diff --git a/compiler-builtins/src/float/div.rs b/compiler-builtins/src/float/div.rs
index 3e4f0e20d..fc1fc0851 100644
--- a/compiler-builtins/src/float/div.rs
+++ b/compiler-builtins/src/float/div.rs
@@ -482,7 +482,7 @@ where
 
         let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1);
         residual_lo = a_significand
-            .wrapping_shl(significand_bits.wrapping_add(CastInto::<u32>::cast(res_exponent)))
+            .wrapping_shl(significand_bits.wrapping_add(CastInto::<u32>::cast_lossy(res_exponent)))
             .wrapping_sub(ret.wrapping_mul(b_significand) << 1);
         ret
     };
diff --git a/compiler-builtins/src/float/mul.rs b/compiler-builtins/src/float/mul.rs
index c811f1406..dbed3095c 100644
--- a/compiler-builtins/src/float/mul.rs
+++ b/compiler-builtins/src/float/mul.rs
@@ -143,7 +143,7 @@ where
         // a zero of the appropriate sign.  Mathematically there is no need to
         // handle this case separately, but we make it a special case to
         // simplify the shift logic.
-        let shift: u32 = one.wrapping_sub(product_exponent.cast()).cast();
+        let shift: u32 = one.wrapping_sub(product_exponent.cast_lossy()).cast();
         if shift >= bits {
             return F::from_bits(product_sign);
         }
diff --git a/compiler-builtins/src/float/trunc.rs b/compiler-builtins/src/float/trunc.rs
index ca8a0f368..93db5d8bb 100644
--- a/compiler-builtins/src/float/trunc.rs
+++ b/compiler-builtins/src/float/trunc.rs
@@ -50,7 +50,7 @@ where
         // The exponent of a is within the range of normal numbers in the
         // destination format.  We can convert by simply right-shifting with
         // rounding and adjusting the exponent.
-        abs_result = (a_abs >> sig_bits_delta).cast();
+        abs_result = (a_abs >> sig_bits_delta).cast_lossy();
         // Cast before shifting to prevent overflow.
         let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast();
         let tmp = bias_diff << R::SIG_BITS;
diff --git a/compiler-builtins/src/int/trailing_zeros.rs b/compiler-builtins/src/int/trailing_zeros.rs
index 8f63c22c8..1b0ae5b73 100644
--- a/compiler-builtins/src/int/trailing_zeros.rs
+++ b/compiler-builtins/src/int/trailing_zeros.rs
@@ -20,18 +20,18 @@ mod implementation {
 
         const { assert!(I::BITS <= 64) };
         if I::BITS >= 64 {
-            r += ((u32::cast_from(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0
+            r += ((u32::cast_from_lossy(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0
             x >>= r; // remove 32 zero bits
         }
 
         if I::BITS >= 32 {
-            t = ((u16::cast_from(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0
+            t = ((u16::cast_from_lossy(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0
             r += t;
             x >>= t; // x = [0 - 0xFFFF] + higher garbage bits
         }
 
         const { assert!(I::BITS >= 16) };
-        t = ((u8::cast_from(x) == 0) as u32) << 3;
+        t = ((u8::cast_from_lossy(x) == 0) as u32) << 3;
         x >>= t; // x = [0 - 0xFF] + higher garbage bits
         r += t;
 
diff --git a/compiler-builtins/src/int/traits.rs b/compiler-builtins/src/int/traits.rs
index b474df366..25b9718ad 100644
--- a/compiler-builtins/src/int/traits.rs
+++ b/compiler-builtins/src/int/traits.rs
@@ -1,4 +1,4 @@
-pub use crate::support::{Int, MinInt};
+pub use crate::support::{CastFrom, CastInto, Int, MinInt};
 
 /// Trait for integers twice the bit width of another integer. This is implemented for all
 /// primitives except for `u8`, because there is not a smaller primitive.
@@ -97,44 +97,3 @@ impl_h_int!(
     i32 u32 i64,
     i64 u64 i128
 );
-
-/// Trait to express (possibly lossy) casting of integers
-pub trait CastInto<T: Copy>: Copy {
-    fn cast(self) -> T;
-}
-
-pub trait CastFrom<T: Copy>: Copy {
-    fn cast_from(value: T) -> Self;
-}
-
-impl<T: Copy, U: CastInto<T> + Copy> CastFrom<U> for T {
-    fn cast_from(value: U) -> Self {
-        value.cast()
-    }
-}
-
-macro_rules! cast_into {
-    ($ty:ty) => {
-        cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128);
-    };
-    ($ty:ty; $($into:ty),*) => {$(
-        impl CastInto<$into> for $ty {
-            fn cast(self) -> $into {
-                self as $into
-            }
-        }
-    )*};
-}
-
-cast_into!(usize);
-cast_into!(isize);
-cast_into!(u8);
-cast_into!(i8);
-cast_into!(u16);
-cast_into!(i16);
-cast_into!(u32);
-cast_into!(i32);
-cast_into!(u64);
-cast_into!(i64);
-cast_into!(u128);
-cast_into!(i128);
diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs
index fa9e06066..716af748a 100644
--- a/libm/src/math/support/int_traits.rs
+++ b/libm/src/math/support/int_traits.rs
@@ -374,14 +374,19 @@ impl_h_int!(
 /// Trait to express (possibly lossy) casting of integers
 pub trait CastInto<T: Copy>: Copy {
     /// By default, casts should be exact.
+    #[track_caller]
     fn cast(self) -> T;
 
     /// Call for casts that are expected to truncate.
+    ///
+    /// In practice, this is exactly the same as `cast`; the main difference is to document intent
+    /// in code. `cast` may panic in debug mode.
     fn cast_lossy(self) -> T;
 }
 
 pub trait CastFrom<T: Copy>: Copy {
     /// By default, casts should be exact.
+    #[track_caller]
     fn cast_from(value: T) -> Self;
 
     /// Call for casts that are expected to truncate.

From b5638a3cac2d177cd6fc65a23559e1a8847e8ae0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 28 May 2025 18:15:41 +0000
Subject: [PATCH 034/133] Remove unneeded C symbols

These are now provided by `compiler-builtins`, so there is no need to
also build the C versions. This was detected by checking for duplicate
symbols and not excluding weak symbols (like CI currently does).
---
 compiler-builtins/build.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs
index 90d98ec7c..d37fdc5df 100644
--- a/compiler-builtins/build.rs
+++ b/compiler-builtins/build.rs
@@ -555,7 +555,6 @@ mod c {
 
         if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics {
             sources.extend(&[
-                ("__comparetf2", "comparetf2.c"),
                 ("__fe_getround", "fp_mode.c"),
                 ("__fe_raise_inexact", "fp_mode.c"),
             ]);
@@ -570,11 +569,11 @@ mod c {
         }
 
         if target.arch == "mips64" {
-            sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
+            sources.extend(&[("__fe_getround", "fp_mode.c")]);
         }
 
         if target.arch == "loongarch64" {
-            sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
+            sources.extend(&[("__fe_getround", "fp_mode.c")]);
         }
 
         // Remove the assembly implementations that won't compile for the target

From 9f0cfc24de919d3b1e6b58bb11994f08db3116f5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 19 Apr 2025 07:38:43 +0000
Subject: [PATCH 035/133] Replace the `nm` symbol check with a Rust
 implementation

This should be less error-prone and adaptable than the `nm` version, and
have better cross-platform support without needing LLVM `nm` installed.
---
 Cargo.toml                      |   1 +
 ci/run.sh                       | 125 +++--------------
 crates/symbol-check/Cargo.toml  |  13 ++
 crates/symbol-check/src/main.rs | 231 ++++++++++++++++++++++++++++++++
 4 files changed, 262 insertions(+), 108 deletions(-)
 create mode 100644 crates/symbol-check/Cargo.toml
 create mode 100644 crates/symbol-check/src/main.rs

diff --git a/Cargo.toml b/Cargo.toml
index b39ec8a25..bc6b4bd29 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ members = [
     "crates/libm-macros",
     "crates/musl-math-sys",
     "crates/panic-handler",
+    "crates/symbol-check",
     "crates/util",
     "libm",
     "libm-test",
diff --git a/ci/run.sh b/ci/run.sh
index 68d13c130..cf3f7dfda 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -47,87 +47,25 @@ else
     fi
 fi
 
-
-declare -a rlib_paths
-
-# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs
-update_rlib_paths() {
-    if [ -d /builtins-target ]; then
-        rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
-    else
-        rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
-    fi
-}
-
-# Remove any existing artifacts from previous tests that don't set #![compiler_builtins]
-update_rlib_paths
-rm -f "${rlib_paths[@]}"
-
-cargo build -p compiler_builtins --target "$target"
-cargo build -p compiler_builtins --target "$target" --release
-cargo build -p compiler_builtins --target "$target" --features c
-cargo build -p compiler_builtins --target "$target" --features c --release
-cargo build -p compiler_builtins --target "$target" --features no-asm
-cargo build -p compiler_builtins --target "$target" --features no-asm --release
-cargo build -p compiler_builtins --target "$target" --features no-f16-f128
-cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release
-
-PREFIX=${target//unknown-/}-
-case "$target" in
-    armv7-*)
-        PREFIX=arm-linux-gnueabihf-
-        ;;
-    thumb*)
-        PREFIX=arm-none-eabi-
-        ;;
-    *86*-*)
-        PREFIX=
-        ;;
-esac
-
-NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) )
-if [ "$NM" = "" ]; then
-  NM="${PREFIX}nm"
-fi
-
-# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with
-# rustup run to ensure that those are in PATH.
-TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')"
-if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then
-  NM="rustup run $TOOLCHAIN $NM"
-fi
-
-# Look out for duplicated symbols when we include the compiler-rt (C) implementation
-update_rlib_paths
-for rlib in "${rlib_paths[@]}"; do
-    set +x
-    echo "================================================================"
-    echo "checking $rlib for duplicate symbols"
-    echo "================================================================"
-    set -x
-    
-    duplicates_found=0
-
-    # NOTE On i586, It's normal that the get_pc_thunk symbol appears several
-    # times so ignore it
-    $NM -g --defined-only "$rlib" 2>&1 |
-      sort |
-      uniq -d |
-      grep -v __x86.get_pc_thunk --quiet |
-      grep 'T __' && duplicates_found=1
-
-    if [ "$duplicates_found" != 0 ]; then
-        echo "error: found duplicate symbols"
-        exit 1
-    else
-        echo "success; no duplicate symbols found"
-    fi
-done
-
-rm -f "${rlib_paths[@]}"
+# Ensure there are no duplicate symbols or references to `core` when
+# `compiler-builtins` is built with various features. Symcheck invokes Cargo to
+# build with the arguments we provide it, then validates the built artifacts.
+symcheck=(cargo run -p symbol-check --release)
+[[ "$target" = "wasm"* ]] && symcheck+=(--features wasm)
+symcheck+=(-- build-and-check)
+
+"${symcheck[@]}" -p compiler_builtins --target "$target"
+"${symcheck[@]}" -p compiler_builtins --target "$target" --release
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features c
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release
 
 build_intrinsics_test() {
-    cargo build \
+    # symcheck also checks the results of builtins-test-intrinsics
+    "${symcheck[@]}" \
         --target "$target" --verbose \
         --manifest-path builtins-test-intrinsics/Cargo.toml "$@"
 }
@@ -143,35 +81,6 @@ build_intrinsics_test --features c --release
 CARGO_PROFILE_DEV_LTO=true build_intrinsics_test
 CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release
 
-# Ensure no references to any symbols from core
-update_rlib_paths
-for rlib in "${rlib_paths[@]}"; do
-    set +x
-    echo "================================================================"
-    echo "checking $rlib for references to core"
-    echo "================================================================"
-    set -x
-
-    tmpdir="${CARGO_TARGET_DIR:-target}/tmp"
-    test -d "$tmpdir" || mkdir "$tmpdir"
-    defined="$tmpdir/defined_symbols.txt"
-    undefined="$tmpdir/defined_symbols.txt"
-
-    $NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined"
-    $NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined"
-    grep_has_results=0
-    grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1
-
-    if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then
-        echo "FIXME: powerpc64 fails these tests"
-    elif [ "$grep_has_results" != 0 ]; then
-        echo "error: found unexpected references to core"
-        exit 1
-    else
-        echo "success; no references to core found"
-    fi
-done
-
 # Test libm
 
 # Make sure a simple build works
diff --git a/crates/symbol-check/Cargo.toml b/crates/symbol-check/Cargo.toml
new file mode 100644
index 000000000..30969ee40
--- /dev/null
+++ b/crates/symbol-check/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "symbol-check"
+version = "0.1.0"
+edition = "2024"
+publish = false
+
+[dependencies]
+# FIXME: used as a git dependency since the latest release does not support wasm
+object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" }
+serde_json = "1.0.140"
+
+[features]
+wasm = ["object/wasm"]
diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
new file mode 100644
index 000000000..104505438
--- /dev/null
+++ b/crates/symbol-check/src/main.rs
@@ -0,0 +1,231 @@
+//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any
+//! linking errors.
+
+use std::collections::{BTreeMap, BTreeSet};
+use std::fs;
+use std::io::{BufRead, BufReader};
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+
+use object::read::archive::{ArchiveFile, ArchiveMember};
+use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection};
+use serde_json::Value;
+
+const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"];
+const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe"), None];
+
+const USAGE: &str = "Usage:
+
+    symbol-check build-and-check CARGO_ARGS ...
+
+Cargo will get invoked with `CARGO_ARGS` and all output
+`compiler_builtins*.rlib` files will be checked.
+";
+
+fn main() {
+    // Create a `&str` vec so we can match on it.
+    let args = std::env::args().collect::<Vec<_>>();
+    let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
+
+    match &args_ref[1..] {
+        ["build-and-check", rest @ ..] if !rest.is_empty() => {
+            let paths = exec_cargo_with_args(rest);
+            for path in paths {
+                println!("Checking {}", path.display());
+                verify_no_duplicates(&path);
+                verify_core_symbols(&path);
+            }
+        }
+        _ => {
+            println!("{USAGE}");
+            std::process::exit(1);
+        }
+    }
+}
+
+/// Run `cargo build` with the provided additional arguments, collecting the list of created
+/// libraries.
+fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
+    let mut cmd = Command::new("cargo")
+        .arg("build")
+        .arg("--message-format=json")
+        .args(args)
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("failed to launch Cargo");
+
+    let stdout = cmd.stdout.take().unwrap();
+    let reader = BufReader::new(stdout);
+    let mut check_files = Vec::new();
+
+    for line in reader.lines() {
+        let line = line.expect("failed to read line");
+        println!("{line}"); // tee to stdout
+
+        // Select only steps that create files
+        let j: Value = serde_json::from_str(&line).expect("failed to deserialize");
+        if j["reason"] != "compiler-artifact" {
+            continue;
+        }
+
+        // Find rlibs in the created file list that match our expected library names and
+        // extensions.
+        for fpath in j["filenames"].as_array().expect("filenames not an array") {
+            let path = fpath.as_str().expect("file name not a string");
+            let path = PathBuf::from(path);
+
+            if CHECK_EXTENSIONS.contains(&path.extension().map(|ex| ex.to_str().unwrap())) {
+                let fname = path.file_name().unwrap().to_str().unwrap();
+
+                if CHECK_LIBRARIES.iter().any(|lib| fname.contains(lib)) {
+                    check_files.push(path);
+                }
+            }
+        }
+    }
+
+    cmd.wait().expect("failed to wait on Cargo");
+
+    assert!(!check_files.is_empty(), "no compiler_builtins rlibs found");
+    println!("Collected the following rlibs to check: {check_files:#?}");
+
+    check_files
+}
+
+/// Information collected from `object`, for convenience.
+#[expect(unused)] // only for printing
+#[derive(Clone, Debug)]
+struct SymInfo {
+    name: String,
+    kind: SymbolKind,
+    scope: SymbolScope,
+    section: SymbolSection,
+    is_undefined: bool,
+    is_global: bool,
+    is_local: bool,
+    is_weak: bool,
+    is_common: bool,
+    address: u64,
+    object: String,
+}
+
+impl SymInfo {
+    fn new(sym: &Symbol, member: &ArchiveMember) -> Self {
+        Self {
+            name: sym.name().expect("missing name").to_owned(),
+            kind: sym.kind(),
+            scope: sym.scope(),
+            section: sym.section(),
+            is_undefined: sym.is_undefined(),
+            is_global: sym.is_global(),
+            is_local: sym.is_local(),
+            is_weak: sym.is_weak(),
+            is_common: sym.is_common(),
+            address: sym.address(),
+            object: String::from_utf8_lossy(member.name()).into_owned(),
+        }
+    }
+}
+
+/// Ensure that the same global symbol isn't defined in multiple object files within an archive.
+///
+/// Note that this will also locate cases where a symbol is weakly defined in more than one place.
+/// Technically there are no linker errors that will come from this, but it keeps our binary more
+/// straightforward and saves some distribution size.
+fn verify_no_duplicates(path: &Path) {
+    let mut syms = BTreeMap::<String, SymInfo>::new();
+    let mut dups = Vec::new();
+    let mut found_any = false;
+
+    for_each_symbol(path, |symbol, member| {
+        // Only check defined globals
+        if !symbol.is_global() || symbol.is_undefined() {
+            return;
+        }
+
+        let sym = SymInfo::new(&symbol, member);
+
+        // x86-32 includes multiple copies of thunk symbols
+        if sym.name.starts_with("__x86.get_pc_thunk") {
+            return;
+        }
+
+        // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo-
+        // relocations. These are allowed to have repeated definitions.
+        let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"];
+        if win_allowed_dup_pfx
+            .iter()
+            .any(|pfx| sym.name.starts_with(pfx))
+        {
+            return;
+        }
+
+        match syms.get(&sym.name) {
+            Some(existing) => {
+                dups.push(sym);
+                dups.push(existing.clone());
+            }
+            None => {
+                syms.insert(sym.name.clone(), sym);
+            }
+        }
+
+        found_any = true;
+    });
+
+    assert!(found_any, "no symbols found");
+
+    if !dups.is_empty() {
+        dups.sort_unstable_by(|a, b| a.name.cmp(&b.name));
+        panic!("found duplicate symbols: {dups:#?}");
+    }
+
+    println!("    success: no duplicate symbols found");
+}
+
+/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
+fn verify_core_symbols(path: &Path) {
+    let mut defined = BTreeSet::new();
+    let mut undefined = Vec::new();
+    let mut has_symbols = false;
+
+    for_each_symbol(path, |symbol, member| {
+        has_symbols = true;
+
+        // Find only symbols from `core`
+        if !symbol.name().unwrap().contains("_ZN4core") {
+            return;
+        }
+
+        let sym = SymInfo::new(&symbol, member);
+        if sym.is_undefined {
+            undefined.push(sym);
+        } else {
+            defined.insert(sym.name);
+        }
+    });
+
+    assert!(has_symbols, "no symbols found");
+
+    // Discard any symbols that are defined somewhere in the archive
+    undefined.retain(|sym| !defined.contains(&sym.name));
+
+    if !undefined.is_empty() {
+        undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name));
+        panic!("found undefined symbols from core: {undefined:#?}");
+    }
+
+    println!("    success: no undefined references to core found");
+}
+
+/// For a given archive path, do something with each symbol.
+fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) {
+    let data = fs::read(path).expect("reading file failed");
+    let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed");
+    for member in archive.members() {
+        let member = member.expect("failed to access member");
+        let obj_data = member.data(&*data).expect("failed to access object");
+        let obj = object::File::parse(obj_data).expect("failed to parse object");
+        obj.symbols().for_each(|sym| f(sym, &member));
+    }
+}

From bfd4058825e0002b0369f105467a93e8290969c5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 28 May 2025 19:59:16 +0000
Subject: [PATCH 036/133] Remove the now-unneeded llvm-tools-preview

Since a working `nm` is no longer needed as part of CI, the rustup
component can be removed.
---
 .github/workflows/main.yaml     | 1 -
 crates/symbol-check/src/main.rs | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index d13dd6b0f..567ad1205 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -119,7 +119,6 @@ jobs:
         rustup update "$channel" --no-self-update
         rustup default "$channel"
         rustup target add "${{ matrix.target }}"
-        rustup component add llvm-tools-preview
     - uses: taiki-e/install-action@nextest
     - uses: Swatinem/rust-cache@v2
       with:
diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index 104505438..4e6417fdf 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -84,7 +84,7 @@ fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
         }
     }
 
-    cmd.wait().expect("failed to wait on Cargo");
+    assert!(cmd.wait().expect("failed to wait on Cargo").success());
 
     assert!(!check_files.is_empty(), "no compiler_builtins rlibs found");
     println!("Collected the following rlibs to check: {check_files:#?}");

From 9bd702d704c81758b991a21636b857acd58a6a04 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 29 May 2025 15:40:05 +0000
Subject: [PATCH 037/133] Change `compiler-builtins` to edition 2024

Do the same for `builtins-test-intrinsics`. Mostly this means updating
`extern` to `unsafe extern`, and fixing a few new Clippy lints.
---
 builtins-test-intrinsics/Cargo.toml              |  2 +-
 builtins-test-intrinsics/src/main.rs             |  6 ++++--
 builtins-test/tests/aeabi_memclr.rs              |  3 ++-
 builtins-test/tests/aeabi_memcpy.rs              |  3 ++-
 builtins-test/tests/aeabi_memset.rs              |  3 ++-
 compiler-builtins/Cargo.toml                     |  2 +-
 compiler-builtins/src/arm.rs                     |  7 +++++--
 .../src/int/specialized_div_rem/mod.rs           | 16 ++++++++--------
 compiler-builtins/src/macros.rs                  |  4 ++--
 compiler-builtins/src/probestack.rs              |  4 +++-
 10 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml
index 6e10628a4..704de20c5 100644
--- a/builtins-test-intrinsics/Cargo.toml
+++ b/builtins-test-intrinsics/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "builtins-test-intrinsics"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 publish = false
 license = "MIT OR Apache-2.0"
 
diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs
index 1fa7b0091..96fe4a738 100644
--- a/builtins-test-intrinsics/src/main.rs
+++ b/builtins-test-intrinsics/src/main.rs
@@ -15,9 +15,10 @@
 
 extern crate panic_handler;
 
+// SAFETY: no definitions, only used for linking
 #[cfg(all(not(thumb), not(windows), not(target_arch = "wasm32")))]
 #[link(name = "c")]
-extern "C" {}
+unsafe extern "C" {}
 
 // Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform
 // doesn't have native support for the operation used in the function. ARM has a naming convention
@@ -663,10 +664,11 @@ pub fn _start() -> ! {
     loop {}
 }
 
+// SAFETY: no definitions, only used for linking
 #[cfg(windows)]
 #[link(name = "kernel32")]
 #[link(name = "msvcrt")]
-extern "C" {}
+unsafe extern "C" {}
 
 // ARM targets need these symbols
 #[unsafe(no_mangle)]
diff --git a/builtins-test/tests/aeabi_memclr.rs b/builtins-test/tests/aeabi_memclr.rs
index bfd15a391..0761feaff 100644
--- a/builtins-test/tests/aeabi_memclr.rs
+++ b/builtins-test/tests/aeabi_memclr.rs
@@ -24,7 +24,8 @@ macro_rules! panic {
     };
 }
 
-extern "C" {
+// SAFETY: defined in  compiler-builtins
+unsafe extern "aapcs" {
     fn __aeabi_memclr4(dest: *mut u8, n: usize);
     fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
 }
diff --git a/builtins-test/tests/aeabi_memcpy.rs b/builtins-test/tests/aeabi_memcpy.rs
index c892c5aba..e76e712a2 100644
--- a/builtins-test/tests/aeabi_memcpy.rs
+++ b/builtins-test/tests/aeabi_memcpy.rs
@@ -22,7 +22,8 @@ macro_rules! panic {
     };
 }
 
-extern "C" {
+// SAFETY: defined in  compiler-builtins
+unsafe extern "aapcs" {
     fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
     fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
 }
diff --git a/builtins-test/tests/aeabi_memset.rs b/builtins-test/tests/aeabi_memset.rs
index 34ab3acc7..8f9f80f96 100644
--- a/builtins-test/tests/aeabi_memset.rs
+++ b/builtins-test/tests/aeabi_memset.rs
@@ -24,7 +24,8 @@ macro_rules! panic {
     };
 }
 
-extern "C" {
+// SAFETY: defined in  compiler-builtins
+unsafe extern "aapcs" {
     fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
 }
 
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index d65a22152..93eb3e01b 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -7,7 +7,7 @@ readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"
 homepage = "https://github.com/rust-lang/compiler-builtins"
 documentation = "https://docs.rs/compiler_builtins"
-edition = "2021"
+edition = "2024"
 description = "Compiler intrinsics used by the Rust compiler."
 links = "compiler-rt"
 
diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs
index a9107e3cd..a7d84e49b 100644
--- a/compiler-builtins/src/arm.rs
+++ b/compiler-builtins/src/arm.rs
@@ -1,13 +1,16 @@
 #![cfg(not(feature = "no-asm"))]
 
 // Interfaces used by naked trampolines.
-extern "C" {
+// SAFETY: these are defined in compiler-builtins
+unsafe extern "C" {
     fn __udivmodsi4(a: u32, b: u32, rem: *mut u32) -> u32;
     fn __udivmoddi4(a: u64, b: u64, rem: *mut u64) -> u64;
     fn __divmoddi4(a: i64, b: i64, rem: *mut i64) -> i64;
 }
 
-extern "aapcs" {
+// SAFETY: these are defined in compiler-builtins
+// FIXME(extern_custom), this isn't always the correct ABI
+unsafe extern "aapcs" {
     // AAPCS is not always the correct ABI for these intrinsics, but we only use this to
     // forward another `__aeabi_` call so it doesn't matter.
     fn __aeabi_idiv(a: i32, b: i32) -> i32;
diff --git a/compiler-builtins/src/int/specialized_div_rem/mod.rs b/compiler-builtins/src/int/specialized_div_rem/mod.rs
index 43f466e75..7841e4f33 100644
--- a/compiler-builtins/src/int/specialized_div_rem/mod.rs
+++ b/compiler-builtins/src/int/specialized_div_rem/mod.rs
@@ -125,10 +125,10 @@ impl_normalization_shift!(
 /// dependencies.
 #[inline]
 fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
-    if let Some(quo) = duo.checked_div(div) {
-        if let Some(rem) = duo.checked_rem(div) {
-            return (quo, rem);
-        }
+    if let Some(quo) = duo.checked_div(div)
+        && let Some(rem) = duo.checked_rem(div)
+    {
+        return (quo, rem);
     }
     zero_div_fn()
 }
@@ -227,10 +227,10 @@ impl_asymmetric!(
 #[inline]
 #[allow(dead_code)]
 fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
-    if let Some(quo) = duo.checked_div(div) {
-        if let Some(rem) = duo.checked_rem(div) {
-            return (quo, rem);
-        }
+    if let Some(quo) = duo.checked_div(div)
+        && let Some(rem) = duo.checked_rem(div)
+    {
+        return (quo, rem);
     }
     zero_div_fn()
 }
diff --git a/compiler-builtins/src/macros.rs b/compiler-builtins/src/macros.rs
index 22e0dd27f..203cd0949 100644
--- a/compiler-builtins/src/macros.rs
+++ b/compiler-builtins/src/macros.rs
@@ -132,7 +132,7 @@ macro_rules! intrinsics {
     ) => (
         #[cfg($name = "optimized-c")]
         pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
-            extern $abi {
+            unsafe extern $abi {
                 fn $name($($argname: $ty),*) $(-> $ret)?;
             }
             unsafe {
@@ -435,7 +435,7 @@ macro_rules! intrinsics {
         pub mod $name {
             #[unsafe(naked)]
             $(#[$($attr)*])*
-            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+            #[cfg_attr(not(feature = "mangled-names"), unsafe(no_mangle))]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
             pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
                 $($body)*
diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index 5b6abd21a..c9070cf55 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -49,7 +49,9 @@
 // We only define stack probing for these architectures today.
 #![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 
-extern "C" {
+// SAFETY: defined in this module.
+// FIXME(extern_custom): the ABI is not correct.
+unsafe extern "C" {
     pub fn __rust_probestack();
 }
 

From af81023a308ab9742bd475c86884efd3fc4bda1a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 29 May 2025 16:07:54 +0000
Subject: [PATCH 038/133] symcheck: Print the command to make reproducing
 errors easier

---
 crates/symbol-check/src/main.rs | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index 4e6417fdf..d83cd318d 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -46,15 +46,16 @@ fn main() {
 /// Run `cargo build` with the provided additional arguments, collecting the list of created
 /// libraries.
 fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
-    let mut cmd = Command::new("cargo")
-        .arg("build")
+    let mut cmd = Command::new("cargo");
+    cmd.arg("build")
         .arg("--message-format=json")
         .args(args)
-        .stdout(Stdio::piped())
-        .spawn()
-        .expect("failed to launch Cargo");
+        .stdout(Stdio::piped());
 
-    let stdout = cmd.stdout.take().unwrap();
+    println!("running: {cmd:?}");
+    let mut child = cmd.spawn().expect("failed to launch Cargo");
+
+    let stdout = child.stdout.take().unwrap();
     let reader = BufReader::new(stdout);
     let mut check_files = Vec::new();
 
@@ -84,7 +85,7 @@ fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
         }
     }
 
-    assert!(cmd.wait().expect("failed to wait on Cargo").success());
+    assert!(child.wait().expect("failed to wait on Cargo").success());
 
     assert!(!check_files.is_empty(), "no compiler_builtins rlibs found");
     println!("Collected the following rlibs to check: {check_files:#?}");

From f5449b0fd4628e4a04b6e37ac2394ceac35dd8e1 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 29 May 2025 17:37:35 +0000
Subject: [PATCH 039/133] Add benchmarks for float parsing and printing

As part of this, the u256 benchmarks are reorganized to a group.
---
 libm-test/benches/icount.rs | 100 ++++++++++++++++++++++++++++++------
 1 file changed, 83 insertions(+), 17 deletions(-)

diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs
index da8c6bfd1..4bebbc41c 100644
--- a/libm-test/benches/icount.rs
+++ b/libm-test/benches/icount.rs
@@ -1,9 +1,11 @@
 //! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
+#![feature(f16)]
+#![feature(f128)]
 
 use std::hint::black_box;
 
 use iai_callgrind::{library_benchmark, library_benchmark_group, main};
-use libm::support::{HInt, u256};
+use libm::support::{HInt, Hexf, hf16, hf32, hf64, hf128, u256};
 use libm_test::generate::spaced;
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
 
@@ -109,11 +111,6 @@ fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) {
     }
 }
 
-library_benchmark_group!(
-    name = icount_bench_u128_widen_mul_group;
-    benchmarks = icount_bench_u128_widen_mul
-);
-
 #[library_benchmark]
 #[bench::linspace(setup_u256_add())]
 fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
@@ -122,11 +119,6 @@ fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
     }
 }
 
-library_benchmark_group!(
-    name = icount_bench_u256_add_group;
-    benchmarks = icount_bench_u256_add
-);
-
 #[library_benchmark]
 #[bench::linspace(setup_u256_shift())]
 fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
@@ -136,16 +128,90 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
 }
 
 library_benchmark_group!(
-    name = icount_bench_u256_shr_group;
-    benchmarks = icount_bench_u256_shr
+    name = icount_bench_u128_group;
+    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_shr
+);
+
+#[library_benchmark]
+#[bench::short("0x12.34p+8")]
+#[bench::max("0x1.ffcp+15")]
+fn icount_bench_hf16(s: &str) -> f16 {
+    black_box(hf16(s))
+}
+
+#[library_benchmark]
+#[bench::short("0x12.34p+8")]
+#[bench::max("0x1.fffffep+127")]
+fn icount_bench_hf32(s: &str) -> f32 {
+    black_box(hf32(s))
+}
+
+#[library_benchmark]
+#[bench::short("0x12.34p+8")]
+#[bench::max("0x1.fffffffffffffp+1023")]
+fn icount_bench_hf64(s: &str) -> f64 {
+    black_box(hf64(s))
+}
+
+#[library_benchmark]
+#[bench::short("0x12.34p+8")]
+#[bench::max("0x1.ffffffffffffffffffffffffffffp+16383")]
+fn icount_bench_hf128(s: &str) -> f128 {
+    black_box(hf128(s))
+}
+
+library_benchmark_group!(
+    name = icount_bench_hf_parse_group;
+    benchmarks =
+    icount_bench_hf16,
+    icount_bench_hf32,
+    icount_bench_hf64,
+    icount_bench_hf128
+);
+
+#[library_benchmark]
+#[bench::short(1.015625)]
+#[bench::max(f16::MAX)]
+fn icount_bench_print_hf16(x: f16) -> String {
+    black_box(Hexf(x).to_string())
+}
+
+#[library_benchmark]
+#[bench::short(1.015625)]
+#[bench::max(f32::MAX)]
+fn icount_bench_print_hf32(x: f32) -> String {
+    black_box(Hexf(x).to_string())
+}
+
+#[library_benchmark]
+#[bench::short(1.015625)]
+#[bench::max(f64::MAX)]
+fn icount_bench_print_hf64(x: f64) -> String {
+    black_box(Hexf(x).to_string())
+}
+
+#[library_benchmark]
+#[bench::short(1.015625)]
+#[bench::max(f128::MAX)]
+fn icount_bench_print_hf128(x: f128) -> String {
+    black_box(Hexf(x).to_string())
+}
+
+library_benchmark_group!(
+    name = icount_bench_hf_print_group;
+    benchmarks =
+    icount_bench_print_hf16,
+    icount_bench_print_hf32,
+    icount_bench_print_hf64,
+    icount_bench_print_hf128
 );
 
 main!(
     library_benchmark_groups =
-    // u256-related benchmarks
-    icount_bench_u128_widen_mul_group,
-    icount_bench_u256_add_group,
-    icount_bench_u256_shr_group,
+    // Benchmarks not related to public libm math
+    icount_bench_u128_group,
+    icount_bench_hf_parse_group,
+    icount_bench_hf_print_group,
     // verify-apilist-start
     // verify-sorted-start
     icount_bench_acos_group,

From b76f6cc5e5567d86d23280a06dc27c82403e6388 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 19 Mar 2025 04:10:03 +0000
Subject: [PATCH 040/133] Run `builtins-test-intrinsics` when possible

Currently we only build this, but it is possible to run the binary.
Change the CI script to do so here.
---
 builtins-test-intrinsics/src/main.rs |  6 ++++--
 ci/run.sh                            | 30 ++++++++++++++++++----------
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs
index 96fe4a738..66744a081 100644
--- a/builtins-test-intrinsics/src/main.rs
+++ b/builtins-test-intrinsics/src/main.rs
@@ -13,6 +13,8 @@
 #![no_std]
 #![no_main]
 
+// Ensure this `compiler_builtins` gets used, rather than the version injected from the sysroot.
+extern crate compiler_builtins;
 extern crate panic_handler;
 
 // SAFETY: no definitions, only used for linking
@@ -652,14 +654,14 @@ fn something_with_a_dtor(f: &dyn Fn()) {
 
 #[unsafe(no_mangle)]
 #[cfg(not(thumb))]
-fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int {
+extern "C" fn main(_argc: core::ffi::c_int, _argv: *const *const u8) -> core::ffi::c_int {
     run();
     0
 }
 
 #[unsafe(no_mangle)]
 #[cfg(thumb)]
-pub fn _start() -> ! {
+extern "C" fn _start() -> ! {
     run();
     loop {}
 }
diff --git a/ci/run.sh b/ci/run.sh
index cf3f7dfda..27b9686ea 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -63,23 +63,33 @@ symcheck+=(-- build-and-check)
 "${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128
 "${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release
 
-build_intrinsics_test() {
-    # symcheck also checks the results of builtins-test-intrinsics
-    "${symcheck[@]}" \
+run_intrinsics_test() {
+    args=(
         --target "$target" --verbose \
-        --manifest-path builtins-test-intrinsics/Cargo.toml "$@"
+        --manifest-path builtins-test-intrinsics/Cargo.toml
+    )
+    args+=( "$@" )
+
+    # symcheck also checks the results of builtins-test-intrinsics
+    "${symcheck[@]}" "${args[@]}"
+
+    # FIXME: we get access violations on Windows, our entrypoint may need to
+    # be tweaked.
+    if [ "${BUILD_ONLY:-}" != "1" ] && ! [[ "$target" = *"windows"* ]]; then
+        cargo run "${args[@]}"
+    fi
 }
 
 # Verify that we haven't dropped any intrinsics/symbols
-build_intrinsics_test
-build_intrinsics_test --release
-build_intrinsics_test --features c
-build_intrinsics_test --features c --release
+run_intrinsics_test
+run_intrinsics_test --release
+run_intrinsics_test --features c
+run_intrinsics_test --features c --release
 
 # Verify that there are no undefined symbols to `panic` within our
 # implementations
-CARGO_PROFILE_DEV_LTO=true build_intrinsics_test
-CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release
+CARGO_PROFILE_DEV_LTO=true run_intrinsics_test
+CARGO_PROFILE_RELEASE_LTO=true run_intrinsics_test --release
 
 # Test libm
 

From 11cf244b8ef7da89607eba968ddfd5dd70cdfd92 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 29 May 2025 19:01:03 +0000
Subject: [PATCH 041/133] ci: Allow concurrency outside of pull requests

When multiple merges to `master` happen before a CI run completes, the
in-progress job is getting canceled. Fix this by using the commit sha
for the group key if a pull request number is not available, rather than
`github.ref` (which is always `refs/head/master` after merge). This
should prevent jobs running on previous commits from getting cancelled,
while still ensuring there is only ever one active run per pull request.
---
 .github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 567ad1205..de433d8c7 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -5,7 +5,7 @@ on:
 
 concurrency:
   # Make sure that new pushes cancel running jobs
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
   cancel-in-progress: true
 
 env:

From 7f3731187f56d257c8aa4fc945c98221e7f28b23 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 29 May 2025 18:06:43 +0000
Subject: [PATCH 042/133] Increase the benchmark rustc version to 2025-05-28

We may soon want to use some new nightly features in `compiler-builtins`
and `libm`, specifically `cfg_target_has_reliable_f16_f128` which was
added in the past few weeks. This will mean we need a newer toolchain
for benchmarks to continue building.

Bump to the current latest nightly so we are not blocked on this down
the line.
---
 .github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index de433d8c7..8e89cb472 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -13,7 +13,7 @@ env:
   RUSTDOCFLAGS: -Dwarnings
   RUSTFLAGS: -Dwarnings
   RUST_BACKTRACE: full
-  BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results
+  BENCHMARK_RUSTC: nightly-2025-05-28 # Pin the toolchain for reproducable results
 
 jobs:
   # Determine which tests should be run based on changed files.

From 502a1149bc555a5ad58fb26dec0cca5404d24354 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 29 May 2025 20:53:48 +0000
Subject: [PATCH 043/133] libm-test: Make `extensive` an attribute rather than
 a test type

Currently we run logspace tests for extensive tests, but there isn't any
reason we couldn't also run more kinds of tests more extensively (e.g.
more edge cases, combine edge cases with logspace for multi-input
functions, etc). As a first step toward making this possible, make
`extensive` a new field in `CheckCtx`, and rename `QuickSpaced` to
`Spaced`.
---
 libm-test/benches/icount.rs           |  2 +-
 libm-test/examples/plot_domains.rs    |  2 +-
 libm-test/src/run_cfg.rs              | 74 ++++++++++++++++++++-------
 libm-test/tests/compare_built_musl.rs |  2 +-
 libm-test/tests/multiprecision.rs     |  2 +-
 libm-test/tests/z_extensive/run.rs    |  3 +-
 6 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs
index 4bebbc41c..a0928a29f 100644
--- a/libm-test/benches/icount.rs
+++ b/libm-test/benches/icount.rs
@@ -23,7 +23,7 @@ macro_rules! icount_benches {
                 let mut ctx = CheckCtx::new(
                     Op::IDENTIFIER,
                     CheckBasis::None,
-                    GeneratorKind::QuickSpaced
+                    GeneratorKind::Spaced
                 );
                 ctx.override_iterations(BENCH_ITER_ITEMS);
                 let ret = spaced::get_test_cases::<Op>(&ctx).0.collect::<Vec<_>>();
diff --git a/libm-test/examples/plot_domains.rs b/libm-test/examples/plot_domains.rs
index 3563103b8..7331d454f 100644
--- a/libm-test/examples/plot_domains.rs
+++ b/libm-test/examples/plot_domains.rs
@@ -55,7 +55,7 @@ where
     Op: MathOp<FTy = f32, RustArgs = (f32,)>,
     Op::RustArgs: SpacedInput<Op>,
 {
-    let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::QuickSpaced);
+    let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::Spaced);
     plot_one_generator(
         out_dir,
         &ctx,
diff --git a/libm-test/src/run_cfg.rs b/libm-test/src/run_cfg.rs
index 3345a01d2..90f81195c 100644
--- a/libm-test/src/run_cfg.rs
+++ b/libm-test/src/run_cfg.rs
@@ -22,13 +22,38 @@ static EXTENSIVE_ITER_OVERRIDE: LazyLock<Option<u64>> = LazyLock::new(|| {
 
 /// Specific tests that need to have a reduced amount of iterations to complete in a reasonable
 /// amount of time.
-///
-/// Contains the itentifier+generator combo to match on, plus the factor to reduce by.
-const EXTEMELY_SLOW_TESTS: &[(Identifier, GeneratorKind, u64)] = &[
-    (Identifier::Fmodf128, GeneratorKind::QuickSpaced, 50),
-    (Identifier::Fmodf128, GeneratorKind::Extensive, 50),
+const EXTREMELY_SLOW_TESTS: &[SlowTest] = &[
+    SlowTest {
+        ident: Identifier::Fmodf128,
+        gen_kind: GeneratorKind::Spaced,
+        extensive: false,
+        reduce_factor: 50,
+    },
+    SlowTest {
+        ident: Identifier::Fmodf128,
+        gen_kind: GeneratorKind::Spaced,
+        extensive: true,
+        reduce_factor: 50,
+    },
 ];
 
+/// A pattern to match a `CheckCtx`, plus a factor to reduce by.
+struct SlowTest {
+    ident: Identifier,
+    gen_kind: GeneratorKind,
+    extensive: bool,
+    reduce_factor: u64,
+}
+
+impl SlowTest {
+    /// True if the test in `CheckCtx` should be reduced by `reduce_factor`.
+    fn matches_ctx(&self, ctx: &CheckCtx) -> bool {
+        self.ident == ctx.fn_ident
+            && self.gen_kind == ctx.gen_kind
+            && self.extensive == ctx.extensive
+    }
+}
+
 /// Maximum number of iterations to run for a single routine.
 ///
 /// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines
@@ -54,6 +79,7 @@ pub struct CheckCtx {
     /// Source of truth for tests.
     pub basis: CheckBasis,
     pub gen_kind: GeneratorKind,
+    pub extensive: bool,
     /// If specified, this value will override the value returned by [`iteration_count`].
     pub override_iterations: Option<u64>,
 }
@@ -69,12 +95,19 @@ impl CheckCtx {
             base_name_str: fn_ident.base_name().as_str(),
             basis,
             gen_kind,
+            extensive: false,
             override_iterations: None,
         };
         ret.ulp = crate::default_ulp(&ret);
         ret
     }
 
+    /// Configure that this is an extensive test.
+    pub fn extensive(mut self, extensive: bool) -> Self {
+        self.extensive = extensive;
+        self
+    }
+
     /// The number of input arguments for this function.
     pub fn input_count(&self) -> usize {
         self.fn_ident.math_op().rust_sig.args.len()
@@ -100,14 +133,17 @@ pub enum CheckBasis {
 /// and quantity.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum GeneratorKind {
+    /// Extremes, zeros, nonstandard numbers, etc.
     EdgeCases,
-    Extensive,
-    QuickSpaced,
+    /// Spaced by logarithm (floats) or linear (integers).
+    Spaced,
+    /// Test inputs from an RNG.
     Random,
+    /// A provided test case list.
     List,
 }
 
-/// A list of all functions that should get extensive tests.
+/// A list of all functions that should get extensive tests, as configured by environment variable.
 ///
 /// This also supports the special test name `all` to run all tests, as well as `all_f16`,
 /// `all_f32`, `all_f64`, and `all_f128` to run all tests for a specific float type.
@@ -216,17 +252,17 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
     let random_iter_count = domain_iter_count / 100;
 
     let mut total_iterations = match ctx.gen_kind {
-        GeneratorKind::QuickSpaced => domain_iter_count,
+        GeneratorKind::Spaced if ctx.extensive => extensive_max_iterations(),
+        GeneratorKind::Spaced => domain_iter_count,
         GeneratorKind::Random => random_iter_count,
-        GeneratorKind::Extensive => extensive_max_iterations(),
         GeneratorKind::EdgeCases | GeneratorKind::List => {
             unimplemented!("shoudn't need `iteration_count` for {:?}", ctx.gen_kind)
         }
     };
 
     // Larger float types get more iterations.
-    if t_env.large_float_ty && ctx.gen_kind != GeneratorKind::Extensive {
-        if ctx.gen_kind == GeneratorKind::Extensive {
+    if t_env.large_float_ty {
+        if ctx.extensive {
             // Extensive already has a pretty high test count.
             total_iterations *= 2;
         } else {
@@ -244,13 +280,13 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
     }
 
     // Some tests are significantly slower than others and need to be further reduced.
-    if let Some((_id, _gen, scale)) = EXTEMELY_SLOW_TESTS
+    if let Some(slow) = EXTREMELY_SLOW_TESTS
         .iter()
-        .find(|(id, generator, _scale)| *id == ctx.fn_ident && *generator == ctx.gen_kind)
+        .find(|slow| slow.matches_ctx(ctx))
     {
         // However, do not override if the extensive iteration count has been manually set.
-        if !(ctx.gen_kind == GeneratorKind::Extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) {
-            total_iterations /= scale;
+        if !(ctx.extensive && EXTENSIVE_ITER_OVERRIDE.is_some()) {
+            total_iterations /= slow.reduce_factor;
         }
     }
 
@@ -279,7 +315,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
     let total = ntests.pow(t_env.input_count.try_into().unwrap());
 
     let seed_msg = match ctx.gen_kind {
-        GeneratorKind::QuickSpaced | GeneratorKind::Extensive => String::new(),
+        GeneratorKind::Spaced => String::new(),
         GeneratorKind::Random => {
             format!(
                 " using `{SEED_ENV}={}`",
@@ -327,8 +363,8 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
     let extensive_range = (-0xfff)..=0xfffff;
 
     match ctx.gen_kind {
-        GeneratorKind::Extensive => extensive_range,
-        GeneratorKind::QuickSpaced | GeneratorKind::Random => non_extensive_range,
+        _ if ctx.extensive => extensive_range,
+        GeneratorKind::Spaced | GeneratorKind::Random => non_extensive_range,
         GeneratorKind::EdgeCases => extensive_range,
         GeneratorKind::List => unimplemented!("shoudn't need range for {:?}", ctx.gen_kind),
     }
diff --git a/libm-test/tests/compare_built_musl.rs b/libm-test/tests/compare_built_musl.rs
index 6ccbb6f4c..86f3b8b71 100644
--- a/libm-test/tests/compare_built_musl.rs
+++ b/libm-test/tests/compare_built_musl.rs
@@ -65,7 +65,7 @@ macro_rules! musl_tests {
             $(#[$attr])*
             fn [< musl_quickspace_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
-                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced);
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Spaced);
                 let cases = spaced::get_test_cases::<Op>(&ctx).0;
                 musl_runner::<Op>(&ctx, cases, musl_math_sys::$fn_name);
             }
diff --git a/libm-test/tests/multiprecision.rs b/libm-test/tests/multiprecision.rs
index 80b2c7868..60175ae61 100644
--- a/libm-test/tests/multiprecision.rs
+++ b/libm-test/tests/multiprecision.rs
@@ -55,7 +55,7 @@ macro_rules! mp_tests {
             $(#[$attr])*
             fn [< mp_quickspace_ $fn_name >]() {
                 type Op = libm_test::op::$fn_name::Routine;
-                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::QuickSpaced);
+                let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Spaced);
                 let cases = spaced::get_test_cases::<Op>(&ctx).0;
                 mp_runner::<Op>(&ctx, cases);
             }
diff --git a/libm-test/tests/z_extensive/run.rs b/libm-test/tests/z_extensive/run.rs
index 59c806ce7..f2ba6a4a0 100644
--- a/libm-test/tests/z_extensive/run.rs
+++ b/libm-test/tests/z_extensive/run.rs
@@ -17,7 +17,6 @@ use rayon::prelude::*;
 use spaced::SpacedInput;
 
 const BASIS: CheckBasis = CheckBasis::Mpfr;
-const GEN_KIND: GeneratorKind = GeneratorKind::Extensive;
 
 /// Run the extensive test suite.
 pub fn run() {
@@ -77,7 +76,7 @@ where
     Op::RustArgs: SpacedInput<Op> + Send,
 {
     let test_name = format!("mp_extensive_{}", Op::NAME);
-    let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GEN_KIND);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, BASIS, GeneratorKind::Spaced).extensive(true);
     let skip = skip_extensive_test(&ctx);
 
     let runner = move || {

From a121a80d2a915e24ff2ca68e29eca6675b881fea Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 29 May 2025 21:22:47 +0000
Subject: [PATCH 044/133] ci: Allow for multiple icount benchmarks in the same
 run

We don't actually need this for now, but eventually it would be nice to
run icount benchmarks on multiple targets. Start tagging artifact names
with the architecture, and allow passing `--tag` to `ci-util.py` in
order to retrieve the correct one.
---
 .github/workflows/main.yaml | 12 ++++++++++--
 ci/bench-icount.sh          | 16 ++++++++++++++--
 ci/ci-util.py               | 17 +++++++++++++----
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 8e89cb472..9f389d8b4 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -195,8 +195,14 @@ jobs:
 
   benchmarks:
     name: Benchmarks
-    runs-on: ubuntu-24.04
     timeout-minutes: 20
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - target: x86_64-unknown-linux-gnu
+          os: ubuntu-24.04
+    runs-on: ${{ matrix.os }}
     steps:
     - uses: actions/checkout@master
       with:
@@ -215,12 +221,14 @@ jobs:
         cargo binstall -y iai-callgrind-runner --version "$iai_version"
         sudo apt-get install valgrind
     - uses: Swatinem/rust-cache@v2
+      with:
+        key: ${{ matrix.target }}
 
     - name: Run icount benchmarks
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         PR_NUMBER: ${{ github.event.pull_request.number }}
-      run: ./ci/bench-icount.sh
+      run: ./ci/bench-icount.sh ${{ matrix.target }}
 
     - name: Upload the benchmark baseline
       uses: actions/upload-artifact@v4
diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
index 4d93e257a..5b6974fe4 100755
--- a/ci/bench-icount.sh
+++ b/ci/bench-icount.sh
@@ -2,10 +2,21 @@
 
 set -eux
 
+target="${1:-}"
+
+if [ -z "$target" ]; then
+    host_target=$(rustc -vV | awk '/^host/ { print $2 }')
+    echo "Defaulted to host target $host_target"
+    target="$host_target"
+fi
+
 iai_home="iai-home"
 
+# Use the arch as a tag to disambiguate artifacts
+tag="$(echo "$target" | cut -d'-' -f1)"
+
 # Download the baseline from master
-./ci/ci-util.py locate-baseline --download --extract
+./ci/ci-util.py locate-baseline --download --extract --tag "$tag"
 
 # Run benchmarks once
 function run_icount_benchmarks() {
@@ -44,6 +55,7 @@ function run_icount_benchmarks() {
         # If this is for a pull request, ignore regressions if specified.
         ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER"
     else
+        # Disregard regressions after merge
         ./ci/ci-util.py check-regressions --home "$iai_home" || true
     fi
 }
@@ -53,6 +65,6 @@ run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat
 run_icount_benchmarks -- --save-baseline=hardfloat
 
 # Name and tar the new baseline
-name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
+name="baseline-icount-$tag-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
 echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
 tar cJf "$name.tar.xz" "$iai_home"
diff --git a/ci/ci-util.py b/ci/ci-util.py
index d785b2e9e..6c8b43980 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -28,11 +28,14 @@
             Calculate a matrix of which functions had source change, print that as
             a JSON object.
 
-        locate-baseline [--download] [--extract]
+        locate-baseline [--download] [--extract] [--tag TAG]
             Locate the most recent benchmark baseline available in CI and, if flags
             specify, download and extract it. Never exits with nonzero status if
             downloading fails.
 
+            `--tag` can be specified to look for artifacts with a specific tag, such as
+            for a specific architecture.
+
             Note that `--extract` will overwrite files in `iai-home`.
 
         check-regressions [--home iai-home] [--allow-pr-override pr_number]
@@ -50,7 +53,7 @@
 GIT = ["git", "-C", REPO_ROOT]
 DEFAULT_BRANCH = "master"
 WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
-ARTIFACT_GLOB = "baseline-icount*"
+ARTIFACT_PREFIX = "baseline-icount*"
 # Place this in a PR body to skip regression checks (must be at the start of a line).
 REGRESSION_DIRECTIVE = "ci: allow-regressions"
 # Place this in a PR body to skip extensive tests
@@ -278,6 +281,7 @@ def locate_baseline(flags: list[str]) -> None:
 
     download = False
     extract = False
+    tag = ""
 
     while len(flags) > 0:
         match flags[0]:
@@ -285,6 +289,9 @@ def locate_baseline(flags: list[str]) -> None:
                 download = True
             case "--extract":
                 extract = True
+            case "--tag":
+                tag = flags[1]
+                flags = flags[1:]
             case _:
                 eprint(USAGE)
                 exit(1)
@@ -333,8 +340,10 @@ def locate_baseline(flags: list[str]) -> None:
         eprint("skipping download step")
         return
 
+    artifact_glob = f"{ARTIFACT_PREFIX}{f"-{tag}" if tag else ""}*"
+
     sp.run(
-        ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
+        ["gh", "run", "download", str(job_id), f"--pattern={artifact_glob}"],
         check=False,
     )
 
@@ -344,7 +353,7 @@ def locate_baseline(flags: list[str]) -> None:
 
     # Find the baseline with the most recent timestamp. GH downloads the files to e.g.
     # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
-    candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
+    candidate_baselines = glob(f"{artifact_glob}/{artifact_glob}")
     if len(candidate_baselines) == 0:
         eprint("no possible baseline directories found")
         return

From b6e15ef6c9c250f29a87d08ab1a62c1374558fe5 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 29 May 2025 22:08:24 +0000
Subject: [PATCH 045/133] chore: release

---
 compiler-builtins/CHANGELOG.md | 15 +++++++++++++++
 compiler-builtins/Cargo.toml   |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/compiler-builtins/CHANGELOG.md b/compiler-builtins/CHANGELOG.md
index a7c01c463..880e56c44 100644
--- a/compiler-builtins/CHANGELOG.md
+++ b/compiler-builtins/CHANGELOG.md
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.1.160](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.159...compiler_builtins-v0.1.160) - 2025-05-29
+
+### Other
+
+- Change `compiler-builtins` to edition 2024
+- Remove unneeded C symbols
+- Reuse `libm`'s `Caat` and `CastFrom` in `compiler-builtins`
+- Reuse `MinInt` and `Int` from `libm` in `compiler-builtins`
+- Update `CmpResult` to use a pointer-sized return type
+- Enable `__powitf2` on MSVC
+- Fix `i256::MAX`
+- Add a note saying why we use `frintx` rather than `frintn`
+- Typo in README.md
+- Clean up unused files
+
 ## [0.1.159](https://github.com/rust-lang/compiler-builtins/compare/compiler_builtins-v0.1.158...compiler_builtins-v0.1.159) - 2025-05-12
 
 ### Other
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 93eb3e01b..8ceef286f 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["Jorge Aparicio <japaricious@gmail.com>"]
 name = "compiler_builtins"
-version = "0.1.159"
+version = "0.1.160"
 license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"

From 81609be3b5543d9271b4ed3b3341921e0004ca1b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 1 Jun 2025 19:41:03 +0000
Subject: [PATCH 046/133] Fix new `dead_code` warnings from recent nightlies

---
 libm/src/math/support/float_traits.rs |   1 +
 libm/src/math/support/hex_float.rs    | 207 ++++++++++++++------------
 libm/src/math/support/int_traits.rs   |   1 +
 libm/src/math/support/macros.rs       |   6 +-
 libm/src/math/support/mod.rs          |   4 +-
 5 files changed, 116 insertions(+), 103 deletions(-)

diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs
index 4c866ef10..dd9f46209 100644
--- a/libm/src/math/support/float_traits.rs
+++ b/libm/src/math/support/float_traits.rs
@@ -6,6 +6,7 @@ use super::int_traits::{CastFrom, Int, MinInt};
 
 /// Trait for some basic operations on floats
 // #[allow(dead_code)]
+#[allow(dead_code)] // Some constants are only used with tests
 pub trait Float:
     Copy
     + fmt::Debug
diff --git a/libm/src/math/support/hex_float.rs b/libm/src/math/support/hex_float.rs
index 85569d98a..c8558b900 100644
--- a/libm/src/math/support/hex_float.rs
+++ b/libm/src/math/support/hex_float.rs
@@ -1,8 +1,6 @@
 //! Utilities for working with hex float formats.
 
-use core::fmt;
-
-use super::{Float, Round, Status, f32_from_bits, f64_from_bits};
+use super::{Round, Status, f32_from_bits, f64_from_bits};
 
 /// Construct a 16-bit float from hex float representation (C-style)
 #[cfg(f16_enabled)]
@@ -352,133 +350,143 @@ const fn u128_ilog2(v: u128) -> u32 {
     u128::BITS - 1 - v.leading_zeros()
 }
 
-/// Format a floating point number as its IEEE hex (`%a`) representation.
-pub struct Hexf<F>(pub F);
+#[cfg(any(test, feature = "unstable-public-internals"))]
+mod hex_fmt {
+    use core::fmt;
 
-// Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs
-#[cfg(not(feature = "compiler-builtins"))]
-fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-    if x.is_sign_negative() {
-        write!(f, "-")?;
-    }
+    use crate::support::Float;
 
-    if x.is_nan() {
-        return write!(f, "NaN");
-    } else if x.is_infinite() {
-        return write!(f, "inf");
-    } else if *x == F::ZERO {
-        return write!(f, "0x0p+0");
-    }
+    /// Format a floating point number as its IEEE hex (`%a`) representation.
+    pub struct Hexf<F>(pub F);
 
-    let mut exponent = x.exp_unbiased();
-    let sig = x.to_bits() & F::SIG_MASK;
-
-    let bias = F::EXP_BIAS as i32;
-    // The mantissa MSB needs to be shifted up to the nearest nibble.
-    let mshift = (4 - (F::SIG_BITS % 4)) % 4;
-    let sig = sig << mshift;
-    // The width is rounded up to the nearest char (4 bits)
-    let mwidth = (F::SIG_BITS as usize + 3) / 4;
-    let leading = if exponent == -bias {
-        // subnormal number means we shift our output by 1 bit.
-        exponent += 1;
-        "0."
-    } else {
-        "1."
-    };
+    // Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs
+    #[cfg(not(feature = "compiler-builtins"))]
+    pub(super) fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if x.is_sign_negative() {
+            write!(f, "-")?;
+        }
 
-    write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
-}
+        if x.is_nan() {
+            return write!(f, "NaN");
+        } else if x.is_infinite() {
+            return write!(f, "inf");
+        } else if *x == F::ZERO {
+            return write!(f, "0x0p+0");
+        }
 
-#[cfg(feature = "compiler-builtins")]
-fn fmt_any_hex<F: Float>(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
-    unimplemented!()
-}
+        let mut exponent = x.exp_unbiased();
+        let sig = x.to_bits() & F::SIG_MASK;
+
+        let bias = F::EXP_BIAS as i32;
+        // The mantissa MSB needs to be shifted up to the nearest nibble.
+        let mshift = (4 - (F::SIG_BITS % 4)) % 4;
+        let sig = sig << mshift;
+        // The width is rounded up to the nearest char (4 bits)
+        let mwidth = (F::SIG_BITS as usize + 3) / 4;
+        let leading = if exponent == -bias {
+            // subnormal number means we shift our output by 1 bit.
+            exponent += 1;
+            "0."
+        } else {
+            "1."
+        };
 
-impl<F: Float> fmt::LowerHex for Hexf<F> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                fmt_any_hex(&self.0, f)
+        write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
+    }
+
+    #[cfg(feature = "compiler-builtins")]
+    pub(super) fn fmt_any_hex<F: Float>(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        unimplemented!()
+    }
+
+    impl<F: Float> fmt::LowerHex for Hexf<F> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    fmt_any_hex(&self.0, f)
+                }
             }
         }
     }
-}
 
-impl<F: Float> fmt::LowerHex for Hexf<(F, F)> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+    impl<F: Float> fmt::LowerHex for Hexf<(F, F)> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+                }
             }
         }
     }
-}
 
-impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+    impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
+                }
             }
         }
     }
-}
 
-impl fmt::LowerHex for Hexf<i32> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                fmt::LowerHex::fmt(&self.0, f)
+    impl fmt::LowerHex for Hexf<i32> {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    fmt::LowerHex::fmt(&self.0, f)
+                }
             }
         }
     }
-}
 
-impl<T> fmt::Debug for Hexf<T>
-where
-    Hexf<T>: fmt::LowerHex,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                fmt::LowerHex::fmt(self, f)
+    impl<T> fmt::Debug for Hexf<T>
+    where
+        Hexf<T>: fmt::LowerHex,
+    {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    fmt::LowerHex::fmt(self, f)
+                }
             }
         }
     }
-}
 
-impl<T> fmt::Display for Hexf<T>
-where
-    Hexf<T>: fmt::LowerHex,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        cfg_if! {
-            if #[cfg(feature = "compiler-builtins")] {
-                let _ = f;
-                unimplemented!()
-            } else {
-                fmt::LowerHex::fmt(self, f)
+    impl<T> fmt::Display for Hexf<T>
+    where
+        Hexf<T>: fmt::LowerHex,
+    {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            cfg_if! {
+                if #[cfg(feature = "compiler-builtins")] {
+                    let _ = f;
+                    unimplemented!()
+                } else {
+                    fmt::LowerHex::fmt(self, f)
+                }
             }
         }
     }
 }
 
+#[cfg(any(test, feature = "unstable-public-internals"))]
+pub use hex_fmt::*;
+
 #[cfg(test)]
 mod parse_tests {
     extern crate std;
@@ -1064,6 +1072,7 @@ mod print_tests {
     use std::string::ToString;
 
     use super::*;
+    use crate::support::Float;
 
     #[test]
     #[cfg(f16_enabled)]
diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs
index 716af748a..9b29e2f45 100644
--- a/libm/src/math/support/int_traits.rs
+++ b/libm/src/math/support/int_traits.rs
@@ -1,6 +1,7 @@
 use core::{cmp, fmt, ops};
 
 /// Minimal integer implementations needed on all integer types, including wide integers.
+#[allow(dead_code)] // Some constants are only used with tests
 pub trait MinInt:
     Copy
     + fmt::Debug
diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs
index 0b72db0e4..2b8fd580a 100644
--- a/libm/src/math/support/macros.rs
+++ b/libm/src/math/support/macros.rs
@@ -137,12 +137,12 @@ macro_rules! hf128 {
 #[cfg(test)]
 macro_rules! assert_biteq {
     ($left:expr, $right:expr, $($tt:tt)*) => {{
-        use $crate::support::Int;
         let l = $left;
         let r = $right;
-        let bits = Int::leading_zeros(l.to_bits() - l.to_bits()); // hack to get the width from the value
+        // hack to get width from a value
+        let bits = $crate::support::Int::leading_zeros(l.to_bits() - l.to_bits());
         assert!(
-            l.biteq(r),
+            $crate::support::Float::biteq(l, r),
             "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
             format_args!($($tt)*),
             lb = l.to_bits(),
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
index 2771cfd32..2e7edd03c 100644
--- a/libm/src/math/support/mod.rs
+++ b/libm/src/math/support/mod.rs
@@ -17,6 +17,8 @@ pub use env::{FpResult, Round, Status};
 #[allow(unused_imports)]
 pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
+#[cfg(any(test, feature = "unstable-public-internals"))]
+pub use hex_float::Hexf;
 #[cfg(f16_enabled)]
 #[allow(unused_imports)]
 pub use hex_float::hf16;
@@ -24,7 +26,7 @@ pub use hex_float::hf16;
 #[allow(unused_imports)]
 pub use hex_float::hf128;
 #[allow(unused_imports)]
-pub use hex_float::{Hexf, hf32, hf64};
+pub use hex_float::{hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
 
 /// Hint to the compiler that the current path is cold.

From 7c12df1bde234d8d7c8245f75bdcb8a18592f3d8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 1 Jun 2025 19:22:42 +0000
Subject: [PATCH 047/133] Upgrade all dependencies to the latest available
 version

In particular, this includes a fix to `iai-callgrind` that will allow us
to simplify our benchmark runner.
---
 builtins-test/Cargo.toml        |  8 ++++----
 compiler-builtins/Cargo.toml    |  4 ++--
 crates/libm-macros/Cargo.toml   |  4 ++--
 crates/musl-math-sys/Cargo.toml |  2 +-
 libm-test/Cargo.toml            | 14 +++++++-------
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml
index 10978c0bb..c7742aa24 100644
--- a/builtins-test/Cargo.toml
+++ b/builtins-test/Cargo.toml
@@ -10,11 +10,11 @@ license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 # For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential
 # problems with system RNGs on the variety of platforms this crate is tested on.
 # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts.
-rand_xoshiro = "0.6"
+rand_xoshiro = "0.7"
 # To compare float builtins against
-rustc_apfloat = "0.2.1"
+rustc_apfloat = "0.2.2"
 # Really a dev dependency, but dev dependencies can't be optional
-iai-callgrind = { version = "0.14.0", optional = true }
+iai-callgrind = { version = "0.14.1", optional = true }
 
 [dependencies.compiler_builtins]
 path = "../compiler-builtins"
@@ -22,7 +22,7 @@ default-features = false
 features = ["unstable-public-internals"]
 
 [dev-dependencies]
-criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
+criterion = { version = "0.6.0", default-features = false, features = ["cargo_bench_support"] }
 paste = "1.0.15"
 
 [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 8ceef286f..6bee8da68 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -19,10 +19,10 @@ test = false
 [dependencies]
 # For more information on this dependency see
 # https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core
-core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
+core = { version = "1.0.1", optional = true, package = "rustc-std-workspace-core" }
 
 [build-dependencies]
-cc = { optional = true, version = "1.0" }
+cc = { optional = true, version = "1.2" }
 
 [dev-dependencies]
 panic-handler = { path = "../crates/panic-handler" }
diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index 3929854f0..6bbf47784 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -10,9 +10,9 @@ proc-macro = true
 
 [dependencies]
 heck = "0.5.0"
-proc-macro2 = "1.0.94"
+proc-macro2 = "1.0.95"
 quote = "1.0.40"
-syn = { version = "2.0.100", features = ["full", "extra-traits", "visit-mut"] }
+syn = { version = "2.0.101", features = ["full", "extra-traits", "visit-mut"] }
 
 [lints.rust]
 # Values used during testing
diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml
index d3fb147e5..3b8811734 100644
--- a/crates/musl-math-sys/Cargo.toml
+++ b/crates/musl-math-sys/Cargo.toml
@@ -11,4 +11,4 @@ license = "MIT OR Apache-2.0"
 libm = { path = "../../libm" }
 
 [build-dependencies]
-cc = "1.2.16"
+cc = "1.2.25"
diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml
index 7a306e735..01b45716b 100644
--- a/libm-test/Cargo.toml
+++ b/libm-test/Cargo.toml
@@ -28,28 +28,28 @@ icount = ["dep:iai-callgrind"]
 short-benchmarks = []
 
 [dependencies]
-anyhow = "1.0.97"
+anyhow = "1.0.98"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
-gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false }
-iai-callgrind = { version = "0.14.0", optional = true }
+gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false }
+iai-callgrind = { version = "0.14.1", optional = true }
 indicatif = { version = "0.17.11", default-features = false }
 libm = { path = "../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../crates/libm-macros" }
 musl-math-sys = { path = "../crates/musl-math-sys", optional = true }
 paste = "1.0.15"
-rand = "0.9.0"
+rand = "0.9.1"
 rand_chacha = "0.9.0"
 rayon = "1.10.0"
 rug = { version = "1.27.0", optional = true, default-features = false, features = ["float", "integer", "std"] }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
-getrandom = { version = "0.3.2", features = ["wasm_js"] }
+getrandom = { version = "0.3.3", features = ["wasm_js"] }
 
 [build-dependencies]
-rand = { version = "0.9.0", optional = true }
+rand = { version = "0.9.1", optional = true }
 
 [dev-dependencies]
-criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
+criterion = { version = "0.6.0", default-features = false, features = ["cargo_bench_support"] }
 libtest-mimic = "0.8.1"
 
 [[bench]]

From 4f943d42831c344bbc91851f646d99e4f73b9b32 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 2 Jun 2025 16:10:49 +0000
Subject: [PATCH 048/133] cleanup: Use `x.biteq(y)` rather than `x.to_bits() ==
 y.to_bits()`

---
 libm-test/src/precision.rs            |  2 +-
 libm-test/src/test_traits.rs          |  5 +----
 libm/src/math/generic/fmaximum.rs     |  2 +-
 libm/src/math/generic/fmaximum_num.rs | 11 +++++------
 libm/src/math/generic/fminimum.rs     |  2 +-
 libm/src/math/generic/fminimum_num.rs | 11 +++++------
 6 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs
index f5fb5f670..f6cdd015a 100644
--- a/libm-test/src/precision.rs
+++ b/libm-test/src/precision.rs
@@ -381,7 +381,7 @@ fn unop_common<F1: Float, F2: Float>(
         }
 
         // abs and copysign require signaling NaNs to be propagated, so verify bit equality.
-        if actual.to_bits() == expected.to_bits() {
+        if actual.biteq(expected) {
             return CheckAction::Custom(Ok(()));
         } else {
             return CheckAction::Custom(Err(anyhow::anyhow!("NaNs have different bitpatterns")));
diff --git a/libm-test/src/test_traits.rs b/libm-test/src/test_traits.rs
index dbb970161..2af6af60b 100644
--- a/libm-test/src/test_traits.rs
+++ b/libm-test/src/test_traits.rs
@@ -328,10 +328,7 @@ where
         // Check when both are NaNs
         if actual.is_nan() && expected.is_nan() {
             if require_biteq && ctx.basis == CheckBasis::None {
-                ensure!(
-                    actual.to_bits() == expected.to_bits(),
-                    "mismatched NaN bitpatterns"
-                );
+                ensure!(actual.biteq(expected), "mismatched NaN bitpatterns");
             }
             // By default, NaNs have nothing special to check.
             return Ok(());
diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs
index 4b6295bc0..898828b80 100644
--- a/libm/src/math/generic/fmaximum.rs
+++ b/libm/src/math/generic/fmaximum.rs
@@ -17,7 +17,7 @@ pub fn fmaximum<F: Float>(x: F, y: F) -> F {
         x
     } else if y.is_nan() {
         y
-    } else if x > y || (y.to_bits() == F::NEG_ZERO.to_bits() && x.is_sign_positive()) {
+    } else if x > y || (y.biteq(F::NEG_ZERO) && x.is_sign_positive()) {
         x
     } else {
         y
diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs
index 2e97ff6d3..05df6cbd4 100644
--- a/libm/src/math/generic/fmaximum_num.rs
+++ b/libm/src/math/generic/fmaximum_num.rs
@@ -15,12 +15,11 @@ use crate::support::Float;
 
 #[inline]
 pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
-    let res =
-        if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
-            y
-        } else {
-            x
-        };
+    let res = if x.is_nan() || x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) {
+        y
+    } else {
+        x
+    };
 
     // Canonicalize
     res * F::ONE
diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs
index 9dc0b64be..8592ac546 100644
--- a/libm/src/math/generic/fminimum.rs
+++ b/libm/src/math/generic/fminimum.rs
@@ -17,7 +17,7 @@ pub fn fminimum<F: Float>(x: F, y: F) -> F {
         x
     } else if y.is_nan() {
         y
-    } else if x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
+    } else if x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) {
         x
     } else {
         y
diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs
index 40db8b189..6777bbf87 100644
--- a/libm/src/math/generic/fminimum_num.rs
+++ b/libm/src/math/generic/fminimum_num.rs
@@ -15,12 +15,11 @@ use crate::support::Float;
 
 #[inline]
 pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
-    let res =
-        if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
-            x
-        } else {
-            y
-        };
+    let res = if y.is_nan() || x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) {
+        x
+    } else {
+        y
+    };
 
     // Canonicalize
     res * F::ONE

From e211ac653fda4e36a4c0f3b71b9fd9643311cabb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 1 Jun 2025 19:52:57 +0000
Subject: [PATCH 049/133] ci: Refactor benchmark regression checks

iai-callgrind now correctly exits with error if regressions were found
[1], so we no longer need to check for regressions manually. Remove this
check and instead exit based on the exit status of the benchmark run.

[1] https://github.com/iai-callgrind/iai-callgrind/issues/337
---
 ci/bench-icount.sh | 19 ++++++-----
 ci/ci-util.py      | 84 +++++++++++-----------------------------------
 2 files changed, 29 insertions(+), 74 deletions(-)

diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
index 5b6974fe4..5724955fe 100755
--- a/ci/bench-icount.sh
+++ b/ci/bench-icount.sh
@@ -46,17 +46,18 @@ function run_icount_benchmarks() {
         shift
     done
 
-    # Run iai-callgrind benchmarks
-    cargo bench "${cargo_args[@]}" -- "${iai_args[@]}"
+    # Run iai-callgrind benchmarks. Do this in a subshell with `&& true` to
+    # capture rather than exit on error.
+    (cargo bench "${cargo_args[@]}" -- "${iai_args[@]}") && true
+    exit_code="$?"
 
-    # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
-    # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
-    if [ -n "${PR_NUMBER:-}" ]; then
-        # If this is for a pull request, ignore regressions if specified.
-        ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER"
-    else
+    if [ "$exit_code" -eq 0 ]; then
+        echo "Benchmarks completed with no regressions"
+    elif [ -z "${PR_NUMBER:-}" ]; then
         # Disregard regressions after merge
-        ./ci/ci-util.py check-regressions --home "$iai_home" || true
+        echo "Benchmarks completed with regressions; ignoring (not in a PR)"
+    else
+        ./ci/ci-util.py handle-banch-regressions "$PR_NUMBER"
     fi
 }
 
diff --git a/ci/ci-util.py b/ci/ci-util.py
index 6c8b43980..3437d304f 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -11,7 +11,7 @@
 import subprocess as sp
 import sys
 from dataclasses import dataclass
-from glob import glob, iglob
+from glob import glob
 from inspect import cleandoc
 from os import getenv
 from pathlib import Path
@@ -38,14 +38,10 @@
 
             Note that `--extract` will overwrite files in `iai-home`.
 
-        check-regressions [--home iai-home] [--allow-pr-override pr_number]
-            Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
-            files and see if there are any regressions. This is used as a workaround
-            for `iai-callgrind` not exiting with error status; see
-            <https://github.com/iai-callgrind/iai-callgrind/issues/337>.
-
-            If `--allow-pr-override` is specified, the regression check will not exit
-            with failure if any line in the PR starts with `allow-regressions`.
+        handle-bench-regressions PR_NUMBER
+            Exit with success if the pull request contains a line starting with
+            `ci: allow-regressions`, indicating that regressions in benchmarks should
+            be accepted. Otherwise, exit 1.
     """
 )
 
@@ -365,64 +361,22 @@ def locate_baseline(flags: list[str]) -> None:
     eprint("baseline extracted successfully")
 
 
-def check_iai_regressions(args: list[str]):
-    """Find regressions in iai summary.json files, exit with failure if any are
-    found.
-    """
-
-    iai_home_str = "iai-home"
-    pr_number = None
-
-    while len(args) > 0:
-        match args:
-            case ["--home", home, *rest]:
-                iai_home_str = home
-                args = rest
-            case ["--allow-pr-override", pr_num, *rest]:
-                pr_number = pr_num
-                args = rest
-            case _:
-                eprint(USAGE)
-                exit(1)
-
-    iai_home = Path(iai_home_str)
-
-    found_summaries = False
-    regressions: list[dict] = []
-    for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
-        found_summaries = True
-        with open(iai_home / summary_path, "r") as f:
-            summary = json.load(f)
-
-        summary_regs = []
-        run = summary["callgrind_summary"]["callgrind_run"]
-        fname = summary["function_name"]
-        id = summary["id"]
-        name_entry = {"name": f"{fname}.{id}"}
-
-        for segment in run["segments"]:
-            summary_regs.extend(segment["regressions"])
+def handle_bench_regressions(args: list[str]):
+    """Exit with error unless the PR message contains an ignore directive."""
 
-        summary_regs.extend(run["total"]["regressions"])
-
-        regressions.extend(name_entry | reg for reg in summary_regs)
-
-    if not found_summaries:
-        eprint(f"did not find any summary.json files within {iai_home}")
-        exit(1)
+    match args:
+        case [pr_number]:
+            pr_number = pr_number
+        case _:
+            eprint(USAGE)
+            exit(1)
 
-    if len(regressions) == 0:
-        eprint("No regressions found")
+    pr = PrInfo.load(pr_number)
+    if pr.contains_directive(REGRESSION_DIRECTIVE):
+        eprint("PR allows regressions")
         return
 
-    eprint("Found regressions:", json.dumps(regressions, indent=4))
-
-    if pr_number is not None:
-        pr = PrInfo.load(pr_number)
-        if pr.contains_directive(REGRESSION_DIRECTIVE):
-            eprint("PR allows regressions, returning")
-            return
-
+    eprint("Regressions were found; benchmark failed")
     exit(1)
 
 
@@ -433,8 +387,8 @@ def main():
             ctx.emit_workflow_output()
         case ["locate-baseline", *flags]:
             locate_baseline(flags)
-        case ["check-regressions", *args]:
-            check_iai_regressions(args)
+        case ["handle-bench-regressions", *args]:
+            handle_bench_regressions(args)
         case ["--help" | "-h"]:
             print(USAGE)
             exit()

From da8433db2382d76d646bf86b2719c07d24e487ac Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 2 Jun 2025 20:20:23 +0000
Subject: [PATCH 050/133] libm-test: Fix unintentional skips in `binop_common`

`binop_common` emits a `SKIP` that is intended to apply only to
`copysign`, but is instead applying to all binary operators. Correct the
general case but leave the currently-failing `maximum_num` tests as a
FIXME, to be resolved separately in [1].

Also simplify skip logic and NaN checking, and add a few more `copysign`
checks.

[1]: https://github.com/rust-lang/compiler-builtins/pull/939
---
 libm-test/src/generate/edge_cases.rs |  1 +
 libm-test/src/precision.rs           | 15 ++++++++++-----
 libm-test/src/test_traits.rs         | 20 ++++++++++++++------
 libm/src/math/copysign.rs            | 10 +++++++++-
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/libm-test/src/generate/edge_cases.rs b/libm-test/src/generate/edge_cases.rs
index 2fb074638..4e4a782a1 100644
--- a/libm-test/src/generate/edge_cases.rs
+++ b/libm-test/src/generate/edge_cases.rs
@@ -51,6 +51,7 @@ where
 
     // Check some special values that aren't included in the above ranges
     values.push(Op::FTy::NAN);
+    values.push(Op::FTy::NEG_NAN);
     values.extend(Op::FTy::consts().iter());
 
     // Check around the maximum subnormal value
diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs
index f6cdd015a..32825b15d 100644
--- a/libm-test/src/precision.rs
+++ b/libm-test/src/precision.rs
@@ -444,13 +444,18 @@ fn binop_common<F1: Float, F2: Float>(
     expected: F2,
     ctx: &CheckCtx,
 ) -> CheckAction {
-    // MPFR only has one NaN bitpattern; allow the default `.is_nan()` checks to validate. Skip if
-    // the first input (magnitude source) is NaN and the output is also a NaN, or if the second
-    // input (sign source) is NaN.
-    if ctx.basis == CheckBasis::Mpfr
+    // MPFR only has one NaN bitpattern; skip tests in cases where the first argument would take
+    // the sign of a NaN second argument. The default NaN checks cover other cases.
+    if ctx.base_name == BaseName::Copysign && ctx.basis == CheckBasis::Mpfr && input.1.is_nan() {
+        return SKIP;
+    }
+
+    // FIXME(#939): this should not be skipped, there is a bug in our implementationi.
+    if ctx.base_name == BaseName::FmaximumNum
+        && ctx.basis == CheckBasis::Mpfr
         && ((input.0.is_nan() && actual.is_nan() && expected.is_nan()) || input.1.is_nan())
     {
-        return SKIP;
+        return XFAIL_NOCHECK;
     }
 
     /* FIXME(#439): our fmin and fmax do not compare signed zeros */
diff --git a/libm-test/src/test_traits.rs b/libm-test/src/test_traits.rs
index 2af6af60b..278274d91 100644
--- a/libm-test/src/test_traits.rs
+++ b/libm-test/src/test_traits.rs
@@ -312,12 +312,9 @@ where
     let mut inner = || -> TestResult {
         let mut allowed_ulp = ctx.ulp;
 
-        // Forbid overrides if the items came from an explicit list, as long as we are checking
-        // against either MPFR or the result itself.
-        let require_biteq = ctx.gen_kind == GeneratorKind::List && ctx.basis != CheckBasis::Musl;
-
         match SpecialCase::check_float(input, actual, expected, ctx) {
-            _ if require_biteq => (),
+            // Forbid overrides if the items came from an explicit list
+            _ if ctx.gen_kind == GeneratorKind::List => (),
             CheckAction::AssertSuccess => (),
             CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg),
             CheckAction::Custom(res) => return res,
@@ -327,9 +324,20 @@ where
 
         // Check when both are NaNs
         if actual.is_nan() && expected.is_nan() {
-            if require_biteq && ctx.basis == CheckBasis::None {
+            // Don't assert NaN bitwise equality if:
+            //
+            // * Testing against MPFR (there is a single NaN representation)
+            // * Testing against Musl except for explicit tests (Musl does some NaN quieting)
+            //
+            // In these cases, just the check that actual and expected are both NaNs is
+            // sufficient.
+            let skip_nan_biteq = ctx.basis == CheckBasis::Mpfr
+                || (ctx.basis == CheckBasis::Musl && ctx.gen_kind != GeneratorKind::List);
+
+            if !skip_nan_biteq {
                 ensure!(actual.biteq(expected), "mismatched NaN bitpatterns");
             }
+
             // By default, NaNs have nothing special to check.
             return Ok(());
         } else if actual.is_nan() || expected.is_nan() {
diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs
index d2a86e7fd..d093d6107 100644
--- a/libm/src/math/copysign.rs
+++ b/libm/src/math/copysign.rs
@@ -59,9 +59,17 @@ mod tests {
 
         // Not required but we expect it
         assert_biteq!(f(F::NAN, F::NAN), F::NAN);
-        assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN);
+        assert_biteq!(f(F::NAN, F::ONE), F::NAN);
+        assert_biteq!(f(F::NAN, F::NEG_ONE), F::NEG_NAN);
         assert_biteq!(f(F::NAN, F::NEG_NAN), F::NEG_NAN);
+        assert_biteq!(f(F::NEG_NAN, F::NAN), F::NAN);
+        assert_biteq!(f(F::NEG_NAN, F::ONE), F::NAN);
+        assert_biteq!(f(F::NEG_NAN, F::NEG_ONE), F::NEG_NAN);
         assert_biteq!(f(F::NEG_NAN, F::NEG_NAN), F::NEG_NAN);
+        assert_biteq!(f(F::ONE, F::NAN), F::ONE);
+        assert_biteq!(f(F::ONE, F::NEG_NAN), F::NEG_ONE);
+        assert_biteq!(f(F::NEG_ONE, F::NAN), F::ONE);
+        assert_biteq!(f(F::NEG_ONE, F::NEG_NAN), F::NEG_ONE);
     }
 
     #[test]

From 3c30d8cb1ec24e0b8a88a5cedcf6b9bece0117d7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 20 May 2025 13:31:31 +0000
Subject: [PATCH 051/133] compiler-builtins: Eliminate symlinks

compiler-builtins has a symlink to the `libm` source directory so the
two crates can share files but still act as two separate crates. This
causes problems with some sysroot-related tooling, however, since
directory symlinks seem to not be supported.

The reason this was a symlink in the first place is that there isn't an
easy for Cargo to publish two crates that share source (building works
fine but publishing rejects `include`d files from parent directories, as
well as nested package roots). However, after the switch to a subtree,
we no longer need to publish compiler-builtins; this means that we can
eliminate the link and just use `#[path]`.

Similarly, the LICENSE file was symlinked so it could live in the
repository root but be included in the package. This is also removed as
it caused problems with the dist job (error from bootstrap's
`tarball.rs`, "generated a symlink in a tarball").

If we need to publish compiler-builtins again for any reason, it would
be easy to revert these changes in a preprocess step.
---
 compiler-builtins/LICENSE.txt        | 1 -
 compiler-builtins/src/math/libm_math | 1 -
 compiler-builtins/src/math/mod.rs    | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)
 delete mode 120000 compiler-builtins/LICENSE.txt
 delete mode 120000 compiler-builtins/src/math/libm_math

diff --git a/compiler-builtins/LICENSE.txt b/compiler-builtins/LICENSE.txt
deleted file mode 120000
index 4ab43736a..000000000
--- a/compiler-builtins/LICENSE.txt
+++ /dev/null
@@ -1 +0,0 @@
-../LICENSE.txt
\ No newline at end of file
diff --git a/compiler-builtins/src/math/libm_math b/compiler-builtins/src/math/libm_math
deleted file mode 120000
index 4d65313c2..000000000
--- a/compiler-builtins/src/math/libm_math
+++ /dev/null
@@ -1 +0,0 @@
-../../../libm/src/math
\ No newline at end of file
diff --git a/compiler-builtins/src/math/mod.rs b/compiler-builtins/src/math/mod.rs
index 078feb9ff..62d729674 100644
--- a/compiler-builtins/src/math/mod.rs
+++ b/compiler-builtins/src/math/mod.rs
@@ -2,6 +2,7 @@
 #[allow(dead_code)]
 #[allow(unused_imports)]
 #[allow(clippy::all)]
+#[path = "../../../libm/src/math/mod.rs"]
 pub(crate) mod libm_math;
 
 macro_rules! libm_intrinsics {

From f1c4a11e96921c88bcb051caa6ea95112e60dca7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 4 Jun 2025 08:20:08 +0000
Subject: [PATCH 052/133] Replace the musl submodule with a download script

The submodule was causing issues in rust-lang/rust, so eliminiate it
here. `build-musl` is also removed from `libm-test`'s default features
so the crate doesn't need to be built by default.
---
 .github/workflows/main.yaml   | 22 ++++++++++------------
 .gitignore                    |  3 +++
 .gitmodules                   |  4 ----
 ci/update-musl.sh             | 15 +++++++++++++++
 crates/musl-math-sys/build.rs |  2 +-
 crates/musl-math-sys/musl     |  1 -
 libm-test/Cargo.toml          |  2 +-
 7 files changed, 30 insertions(+), 19 deletions(-)
 delete mode 100644 .gitmodules
 create mode 100755 ci/update-musl.sh
 delete mode 160000 crates/musl-math-sys/musl

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 9f389d8b4..95b0962b0 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -108,8 +108,6 @@ jobs:
     - name: Print runner information
       run: uname -a
     - uses: actions/checkout@v4
-      with:
-        submodules: true
     - name: Install Rust (rustup)
       shell: bash
       run: |
@@ -146,6 +144,10 @@ jobs:
       shell: bash
     - run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV"
       shell: bash
+      
+    - name: Download musl source
+      run: ./ci/update-musl.sh
+      shell: bash
 
     - name: Verify API list
       if: matrix.os == 'ubuntu-24.04'
@@ -182,8 +184,6 @@ jobs:
     timeout-minutes: 10
     steps:
     - uses: actions/checkout@v4
-      with:
-        submodules: true
     # Unlike rustfmt, stable clippy does not work on code with nightly features.
     - name: Install nightly `clippy`
       run: |
@@ -191,6 +191,8 @@ jobs:
         rustup default nightly
         rustup component add clippy
     - uses: Swatinem/rust-cache@v2
+    - name: Download musl source
+      run: ./ci/update-musl.sh
     - run: cargo clippy --workspace --all-targets
 
   benchmarks:
@@ -205,8 +207,6 @@ jobs:
     runs-on: ${{ matrix.os }}
     steps:
     - uses: actions/checkout@master
-      with:
-        submodules: true
     - uses: taiki-e/install-action@cargo-binstall
 
     - name: Set up dependencies
@@ -223,6 +223,8 @@ jobs:
     - uses: Swatinem/rust-cache@v2
       with:
         key: ${{ matrix.target }}
+    - name: Download musl source
+      run: ./ci/update-musl.sh
 
     - name: Run icount benchmarks
       env:
@@ -256,8 +258,6 @@ jobs:
     timeout-minutes: 10
     steps:
     - uses: actions/checkout@v4
-      with:
-        submodules: true
     - name: Install Rust (rustup)
       run: rustup update nightly --no-self-update && rustup default nightly
       shell: bash
@@ -292,8 +292,6 @@ jobs:
     timeout-minutes: 10
     steps:
     - uses: actions/checkout@v4
-      with:
-        submodules: true
     - name: Install stable `rustfmt`
       run: rustup set profile minimal && rustup default stable && rustup component add rustfmt
     - run: cargo fmt -- --check
@@ -317,13 +315,13 @@ jobs:
       TO_TEST: ${{ matrix.to_test }}
     steps:
       - uses: actions/checkout@v4
-        with:
-          submodules: true
       - name: Install Rust
         run: |
           rustup update nightly --no-self-update
           rustup default nightly
       - uses: Swatinem/rust-cache@v2
+      - name: download musl source
+        run: ./ci/update-musl.sh
       - name: Run extensive tests
         run: ./ci/run-extensive.sh
       - name: Print test logs if available
diff --git a/.gitignore b/.gitignore
index 5287a6c72..f12b871c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,6 @@ iai-home
 *.bk
 *.rs.bk
 .#*
+
+# Manually managed
+crates/musl-math-sys/musl
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 792ed9ab2..000000000
--- a/.gitmodules
+++ /dev/null
@@ -1,4 +0,0 @@
-[submodule "crates/musl-math-sys/musl"]
-	path = crates/musl-math-sys/musl
-	url = https://git.musl-libc.org/git/musl
-	shallow = true
diff --git a/ci/update-musl.sh b/ci/update-musl.sh
new file mode 100755
index 000000000..b71cf5778
--- /dev/null
+++ b/ci/update-musl.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# Download musl to a repository for `musl-math-sys`
+
+set -eux
+
+url=git://git.musl-libc.org/musl
+ref=c47ad25ea3b484e10326f933e927c0bc8cded3da
+dst=crates/musl-math-sys/musl
+
+if ! [ -d "$dst" ]; then
+    git clone "$url" "$dst" --single-branch --depth=1000
+fi
+
+git -C "$dst" fetch "$url" --depth=1
+git -C "$dst" checkout "$ref"
diff --git a/crates/musl-math-sys/build.rs b/crates/musl-math-sys/build.rs
index b00dbc73e..59e42f2d2 100644
--- a/crates/musl-math-sys/build.rs
+++ b/crates/musl-math-sys/build.rs
@@ -120,7 +120,7 @@ fn build_musl_math(cfg: &Config) {
     let arch_dir = musl_dir.join("arch").join(&cfg.musl_arch);
     assert!(
         math.exists(),
-        "musl source not found. Is the submodule up to date?"
+        "musl source not found. You may need to run `./ci/update-musl.sh`."
     );
 
     let source_map = find_math_source(&math, cfg);
diff --git a/crates/musl-math-sys/musl b/crates/musl-math-sys/musl
deleted file mode 160000
index c47ad25ea..000000000
--- a/crates/musl-math-sys/musl
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c47ad25ea3b484e10326f933e927c0bc8cded3da
diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml
index 01b45716b..05fcc3234 100644
--- a/libm-test/Cargo.toml
+++ b/libm-test/Cargo.toml
@@ -6,7 +6,7 @@ publish = false
 license = "MIT OR Apache-2.0"
 
 [features]
-default = ["build-mpfr", "build-musl", "unstable-float"]
+default = ["build-mpfr", "unstable-float"]
 
 # Propagated from libm because this affects which functions we test.
 unstable-float = ["libm/unstable-float", "rug?/nightly-float"]

From 9e0cc1dbe45b552322f5512e484e2f0670c901c2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 28 May 2025 14:45:14 +0000
Subject: [PATCH 053/133] Add an empty rust-version file

This will be used by `josh` tooling.
---
 rust-version | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 rust-version

diff --git a/rust-version b/rust-version
new file mode 100644
index 000000000..e69de29bb

From ded114bca9a34d3ad4cd47f6a7287c7937c0ca38 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 18 May 2025 17:30:58 +0000
Subject: [PATCH 054/133] Add tooling for `josh` syncs

Create a crate that handles pulling from and pushing to rust-lang/rust.
This can be invoked with the following:

    $ cargo run -p josh-sync -- rustc-pull
    $ RUSTC_GIT=/path/to/rust/checkout cargo run -p josh-sync -- rustc-push <username>
---
 Cargo.toml                   |   1 +
 crates/josh-sync/Cargo.toml  |   7 +
 crates/josh-sync/src/main.rs |  45 +++++
 crates/josh-sync/src/sync.rs | 371 +++++++++++++++++++++++++++++++++++
 4 files changed, 424 insertions(+)
 create mode 100644 crates/josh-sync/Cargo.toml
 create mode 100644 crates/josh-sync/src/main.rs
 create mode 100644 crates/josh-sync/src/sync.rs

diff --git a/Cargo.toml b/Cargo.toml
index bc6b4bd29..fb638f2fb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,7 @@ resolver = "2"
 members = [
     "builtins-test",
     "compiler-builtins",
+    "crates/josh-sync",
     "crates/libm-macros",
     "crates/musl-math-sys",
     "crates/panic-handler",
diff --git a/crates/josh-sync/Cargo.toml b/crates/josh-sync/Cargo.toml
new file mode 100644
index 000000000..1f3bb376d
--- /dev/null
+++ b/crates/josh-sync/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "josh-sync"
+edition = "2024"
+publish = false
+
+[dependencies]
+directories = "6.0.0"
diff --git a/crates/josh-sync/src/main.rs b/crates/josh-sync/src/main.rs
new file mode 100644
index 000000000..7f0b11900
--- /dev/null
+++ b/crates/josh-sync/src/main.rs
@@ -0,0 +1,45 @@
+use std::io::{Read, Write};
+use std::process::exit;
+use std::{env, io};
+
+use crate::sync::{GitSync, Josh};
+
+mod sync;
+
+const USAGE: &str = r#"Utility for synchroniing compiler-builtins with rust-lang/rust
+
+Usage:
+
+    josh-sync rustc-pull
+
+        Pull from rust-lang/rust to compiler-builtins. Creates a commit
+        updating the version file, followed by a merge commit.
+
+    josh-sync rustc-push GITHUB_USERNAME [BRANCH]
+
+        Create a branch off of rust-lang/rust updating compiler-builtins.
+"#;
+
+fn main() {
+    let sync = GitSync::from_current_dir();
+
+    // Collect args, then recollect as str refs so we can match on them
+    let args: Vec<_> = env::args().collect();
+    let args: Vec<&str> = args.iter().map(String::as_str).collect();
+
+    match args.as_slice()[1..] {
+        ["rustc-pull"] => sync.rustc_pull(None),
+        ["rustc-push", github_user, branch] => sync.rustc_push(github_user, Some(branch)),
+        ["rustc-push", github_user] => sync.rustc_push(github_user, None),
+        ["start-josh"] => {
+            let _josh = Josh::start();
+            println!("press enter to stop");
+            io::stdout().flush().unwrap();
+            let _ = io::stdin().read(&mut [0u8]).unwrap();
+        }
+        _ => {
+            println!("{USAGE}");
+            exit(1);
+        }
+    }
+}
diff --git a/crates/josh-sync/src/sync.rs b/crates/josh-sync/src/sync.rs
new file mode 100644
index 000000000..003cf187d
--- /dev/null
+++ b/crates/josh-sync/src/sync.rs
@@ -0,0 +1,371 @@
+use std::net::{SocketAddr, TcpStream};
+use std::process::{Command, Stdio, exit};
+use std::time::Duration;
+use std::{env, fs, process, thread};
+
+const JOSH_PORT: u16 = 42042;
+const DEFAULT_PR_BRANCH: &str = "update-builtins";
+
+pub struct GitSync {
+    upstream_repo: String,
+    upstream_ref: String,
+    upstream_url: String,
+    josh_filter: String,
+    josh_url_base: String,
+}
+
+/// This code was adapted from the miri repository, via the rustc-dev-guide
+/// (<https://github.com/rust-lang/rustc-dev-guide/tree/c51adbd12d/josh-sync>)
+impl GitSync {
+    pub fn from_current_dir() -> Self {
+        let upstream_repo =
+            env::var("UPSTREAM_ORG").unwrap_or_else(|_| "rust-lang".to_owned()) + "/rust";
+
+        Self {
+            upstream_url: format!("https://github.com/{upstream_repo}"),
+            upstream_repo,
+            upstream_ref: env::var("UPSTREAM_REF").unwrap_or_else(|_| "HEAD".to_owned()),
+            josh_filter: ":/library/compiler-builtins".to_owned(),
+            josh_url_base: format!("http://localhost:{JOSH_PORT}"),
+        }
+    }
+
+    /// Pull from rust-lang/rust to compiler-builtins.
+    pub fn rustc_pull(&self, commit: Option<String>) {
+        let Self {
+            upstream_ref,
+            upstream_url,
+            upstream_repo,
+            ..
+        } = self;
+
+        let new_upstream_base = commit.unwrap_or_else(|| {
+            let out = check_output(["git", "ls-remote", upstream_url, upstream_ref]);
+            out.split_whitespace()
+                .next()
+                .unwrap_or_else(|| panic!("could not split output: '{out}'"))
+                .to_owned()
+        });
+
+        ensure_clean();
+
+        // Make sure josh is running.
+        let _josh = Josh::start();
+        let josh_url_filtered = self.josh_url(
+            &self.upstream_repo,
+            Some(&new_upstream_base),
+            Some(&self.josh_filter),
+        );
+
+        let previous_upstream_base = fs::read_to_string("rust-version")
+            .expect("failed to read `rust-version`")
+            .trim()
+            .to_string();
+        assert_ne!(previous_upstream_base, new_upstream_base, "nothing to pull");
+
+        let orig_head = check_output(["git", "rev-parse", "HEAD"]);
+        println!("original upstream base: {previous_upstream_base}");
+        println!("new upstream base: {new_upstream_base}");
+        println!("original HEAD: {orig_head}");
+
+        // Fetch the latest upstream HEAD so we can get a summary. Use the Josh URL for caching.
+        run([
+            "git",
+            "fetch",
+            &self.josh_url(&self.upstream_repo, Some(&new_upstream_base), Some(":/")),
+            &new_upstream_base,
+            "--depth=1",
+        ]);
+        let new_summary = check_output(["git", "log", "-1", "--format=%h %s", &new_upstream_base]);
+
+        // Update rust-version file. As a separate commit, since making it part of
+        // the merge has confused the heck out of josh in the past.
+        // We pass `--no-verify` to avoid running git hooks.
+        // We do this before the merge so that if there are merge conflicts, we have
+        // the right rust-version file while resolving them.
+        fs::write("rust-version", format!("{new_upstream_base}\n"))
+            .expect("failed to write rust-version");
+
+        let prep_message = format!(
+            "Update the upstream Rust version\n\n\
+            To prepare for merging from {upstream_repo}, set the version file to:\n\n    \
+            {new_summary}\n\
+            ",
+        );
+        run([
+            "git",
+            "commit",
+            "rust-version",
+            "--no-verify",
+            "-m",
+            &prep_message,
+        ]);
+
+        // Fetch given rustc commit.
+        run(["git", "fetch", &josh_url_filtered]);
+        let incoming_ref = check_output(["git", "rev-parse", "FETCH_HEAD"]);
+        println!("incoming ref: {incoming_ref}");
+
+        let merge_message = format!(
+            "Merge ref '{upstream_head_short}{filter}' from {upstream_url}\n\n\
+            Pull recent changes from {upstream_repo} via Josh.\n\n\
+            Upstream ref: {new_upstream_base}\n\
+            Filtered ref: {incoming_ref}\n\
+            ",
+            upstream_head_short = &new_upstream_base[..12],
+            filter = self.josh_filter
+        );
+
+        // This should not add any new root commits. So count those before and after merging.
+        let num_roots = || -> u32 {
+            let out = check_output(["git", "rev-list", "HEAD", "--max-parents=0", "--count"]);
+            out.trim()
+                .parse::<u32>()
+                .unwrap_or_else(|e| panic!("failed to parse `{out}`: {e}"))
+        };
+        let num_roots_before = num_roots();
+
+        let pre_merge_sha = check_output(["git", "rev-parse", "HEAD"]);
+        println!("pre-merge HEAD: {pre_merge_sha}");
+
+        // Merge the fetched commit.
+        run([
+            "git",
+            "merge",
+            "FETCH_HEAD",
+            "--no-verify",
+            "--no-ff",
+            "-m",
+            &merge_message,
+        ]);
+
+        let current_sha = check_output(["git", "rev-parse", "HEAD"]);
+        if current_sha == pre_merge_sha {
+            run(["git", "reset", "--hard", &orig_head]);
+            eprintln!(
+                "No merge was performed, no changes to pull were found. \
+                Rolled back the preparation commit."
+            );
+            exit(1);
+        }
+
+        // Check that the number of roots did not increase.
+        assert_eq!(
+            num_roots(),
+            num_roots_before,
+            "Josh created a new root commit. This is probably not the history you want."
+        );
+    }
+
+    /// Construct an update to rust-lang/rust from compiler-builtins.
+    pub fn rustc_push(&self, github_user: &str, branch: Option<&str>) {
+        let Self {
+            josh_filter,
+            upstream_url,
+            ..
+        } = self;
+
+        let branch = branch.unwrap_or(DEFAULT_PR_BRANCH);
+        let josh_url = self.josh_url(&format!("{github_user}/rust"), None, Some(josh_filter));
+        let user_upstream_url = format!("git@github.com:{github_user}/rust.git");
+
+        let Ok(rustc_git) = env::var("RUSTC_GIT") else {
+            panic!("the RUSTC_GIT environment variable must be set to a rust-lang/rust checkout")
+        };
+
+        ensure_clean();
+        let base = fs::read_to_string("rust-version")
+            .expect("failed to read `rust-version`")
+            .trim()
+            .to_string();
+
+        // Make sure josh is running.
+        let _josh = Josh::start();
+
+        // Prepare the branch. Pushing works much better if we use as base exactly
+        // the commit that we pulled from last time, so we use the `rust-version`
+        // file to find out which commit that would be.
+        println!("Preparing {github_user}/rust (base: {base})...");
+
+        if Command::new("git")
+            .args(["-C", &rustc_git, "fetch", &user_upstream_url, branch])
+            .output() // capture output
+            .expect("could not run fetch")
+            .status
+            .success()
+        {
+            panic!(
+                "The branch '{branch}' seems to already exist in '{user_upstream_url}'. \
+                 Please delete it and try again."
+            );
+        }
+
+        run(["git", "-C", &rustc_git, "fetch", upstream_url, &base]);
+
+        run_cfg("git", |c| {
+            c.args([
+                "-C",
+                &rustc_git,
+                "push",
+                &user_upstream_url,
+                &format!("{base}:refs/heads/{branch}"),
+            ])
+            .stdout(Stdio::null())
+            .stderr(Stdio::null()) // silence the "create GitHub PR" message
+        });
+        println!("pushed PR branch");
+
+        // Do the actual push.
+        println!("Pushing changes...");
+        run(["git", "push", &josh_url, &format!("HEAD:{branch}")]);
+        println!();
+
+        // Do a round-trip check to make sure the push worked as expected.
+        run(["git", "fetch", &josh_url, branch]);
+
+        let head = check_output(["git", "rev-parse", "HEAD"]);
+        let fetch_head = check_output(["git", "rev-parse", "FETCH_HEAD"]);
+        assert_eq!(
+            head, fetch_head,
+            "Josh created a non-roundtrip push! Do NOT merge this into rustc!\n\
+             Expected {head}, got {fetch_head}."
+        );
+        println!(
+            "Confirmed that the push round-trips back to compiler-builtins properly. Please \
+            create a rustc PR:"
+        );
+        // Open PR with `subtree update` title to silence the `no-merges` triagebot check
+        println!(
+            "    {upstream_url}/compare/{github_user}:{branch}?quick_pull=1\
+            &title=Update%20the%20%60compiler-builtins%60%20subtree\
+            &body=Update%20the%20Josh%20subtree%20to%20https%3A%2F%2Fgithub.com%2Frust-lang%2F\
+            compiler-builtins%2Fcommit%2F{head_short}.%0A%0Ar%3F%20%40ghost",
+            head_short = &head[..12],
+        );
+    }
+
+    /// Construct a url to the local Josh server with (optionally)
+    fn josh_url(&self, repo: &str, rev: Option<&str>, filter: Option<&str>) -> String {
+        format!(
+            "{base}/{repo}.git{at}{rev}{filter}{filt_git}",
+            base = self.josh_url_base,
+            at = if rev.is_some() { "@" } else { "" },
+            rev = rev.unwrap_or_default(),
+            filter = filter.unwrap_or_default(),
+            filt_git = if filter.is_some() { ".git" } else { "" }
+        )
+    }
+}
+
+/// Fail if there are files that need to be checked in.
+fn ensure_clean() {
+    let read = check_output(["git", "status", "--untracked-files=no", "--porcelain"]);
+    assert!(
+        read.is_empty(),
+        "working directory must be clean before performing rustc pull"
+    );
+}
+
+/* Helpers for running commands with logged invocations */
+
+/// Run a command from an array, passing its output through.
+fn run<'a, Args: AsRef<[&'a str]>>(l: Args) {
+    let l = l.as_ref();
+    run_cfg(l[0], |c| c.args(&l[1..]));
+}
+
+/// Run a command from an array, collecting its output.
+fn check_output<'a, Args: AsRef<[&'a str]>>(l: Args) -> String {
+    let l = l.as_ref();
+    check_output_cfg(l[0], |c| c.args(&l[1..]))
+}
+
+/// [`run`] with configuration.
+fn run_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) {
+    // self.read(l.as_ref());
+    check_output_cfg(prog, |c| f(c.stdout(Stdio::inherit())));
+}
+
+/// [`read`] with configuration. All shell helpers print the command and pass stderr.
+fn check_output_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) -> String {
+    let mut cmd = Command::new(prog);
+    cmd.stderr(Stdio::inherit());
+    f(&mut cmd);
+    eprintln!("+ {cmd:?}");
+    let out = cmd.output().expect("command failed");
+    assert!(out.status.success());
+    String::from_utf8(out.stdout.trim_ascii().to_vec()).expect("non-UTF8 output")
+}
+
+/// Create a wrapper that stops Josh on drop.
+pub struct Josh(process::Child);
+
+impl Josh {
+    pub fn start() -> Self {
+        // Determine cache directory.
+        let user_dirs =
+            directories::ProjectDirs::from("org", "rust-lang", "rustc-compiler-builtins-josh")
+                .unwrap();
+        let local_dir = user_dirs.cache_dir().to_owned();
+
+        // Start josh, silencing its output.
+        #[expect(clippy::zombie_processes, reason = "clippy can't handle the loop")]
+        let josh = process::Command::new("josh-proxy")
+            .arg("--local")
+            .arg(local_dir)
+            .args([
+                "--remote=https://github.com",
+                &format!("--port={JOSH_PORT}"),
+                "--no-background",
+            ])
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .spawn()
+            .expect("failed to start josh-proxy, make sure it is installed");
+
+        // Wait until the port is open. We try every 10ms until 1s passed.
+        for _ in 0..100 {
+            // This will generally fail immediately when the port is still closed.
+            let addr = SocketAddr::from(([127, 0, 0, 1], JOSH_PORT));
+            let josh_ready = TcpStream::connect_timeout(&addr, Duration::from_millis(1));
+
+            if josh_ready.is_ok() {
+                println!("josh up and running");
+                return Josh(josh);
+            }
+
+            // Not ready yet.
+            thread::sleep(Duration::from_millis(10));
+        }
+        panic!("Even after waiting for 1s, josh-proxy is still not available.")
+    }
+}
+
+impl Drop for Josh {
+    fn drop(&mut self) {
+        if cfg!(unix) {
+            // Try to gracefully shut it down.
+            Command::new("kill")
+                .args(["-s", "INT", &self.0.id().to_string()])
+                .output()
+                .expect("failed to SIGINT josh-proxy");
+            // Sadly there is no "wait with timeout"... so we just give it some time to finish.
+            thread::sleep(Duration::from_millis(100));
+            // Now hopefully it is gone.
+            if self
+                .0
+                .try_wait()
+                .expect("failed to wait for josh-proxy")
+                .is_some()
+            {
+                return;
+            }
+        }
+        // If that didn't work (or we're not on Unix), kill it hard.
+        eprintln!(
+            "I have to kill josh-proxy the hard way, let's hope this does not \
+            break anything."
+        );
+        self.0.kill().expect("failed to SIGKILL josh-proxy");
+    }
+}

From 162576fa9844ec5111191e32a3384a26f8c825fb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 4 Jun 2025 18:10:07 +0000
Subject: [PATCH 055/133] Update the upstream Rust version

To prepare for merging from rust-lang/rust, set the version file to:

    df8102fe5f Auto merge of #142002 - onur-ozkan:follow-ups2, r=jieyouxu
---
 rust-version | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rust-version b/rust-version
index e69de29bb..e05aaa057 100644
--- a/rust-version
+++ b/rust-version
@@ -0,0 +1 @@
+df8102fe5f24f28a918660b0cd918d7331c3896e

From cd0f2026a1995c0314aea6b52ed30b9e050931d0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 4 Jun 2025 21:17:51 +0000
Subject: [PATCH 056/133] `panic-handler`: Remove the `no_core` feature

This was introduced before `#[panic_handler]` was stable, but should no
longer be needed. Additionally, we only need it for
`builtins-test-intrinsics`, not as a dependency of `compiler-builtins`.
---
 builtins-test-intrinsics/Cargo.toml | 2 +-
 compiler-builtins/Cargo.toml        | 3 ---
 crates/panic-handler/src/lib.rs     | 7 ++-----
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml
index 704de20c5..064b7cad2 100644
--- a/builtins-test-intrinsics/Cargo.toml
+++ b/builtins-test-intrinsics/Cargo.toml
@@ -6,7 +6,7 @@ publish = false
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"]}
+compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"] }
 panic-handler = { path = "../crates/panic-handler" }
 
 [features]
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 6bee8da68..11ee91954 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -24,9 +24,6 @@ core = { version = "1.0.1", optional = true, package = "rustc-std-workspace-core
 [build-dependencies]
 cc = { optional = true, version = "1.2" }
 
-[dev-dependencies]
-panic-handler = { path = "../crates/panic-handler" }
-
 [features]
 default = ["compiler-builtins"]
 
diff --git a/crates/panic-handler/src/lib.rs b/crates/panic-handler/src/lib.rs
index 673e00522..f4d7c8397 100644
--- a/crates/panic-handler/src/lib.rs
+++ b/crates/panic-handler/src/lib.rs
@@ -1,11 +1,8 @@
 //! This is needed for tests on targets that require a `#[panic_handler]` function
 
-#![feature(no_core)]
-#![no_core]
-
-extern crate core;
+#![no_std]
 
 #[panic_handler]
-fn panic(_: &core::panic::PanicInfo) -> ! {
+fn panic(_: &core::panic::PanicInfo<'_>) -> ! {
     loop {}
 }

From 23567698971accae711fa3514f95c996ecba2abf Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 4 Jun 2025 20:56:35 +0000
Subject: [PATCH 057/133] Use the in-tree `compiler-builtins`

Many of `std`'s dependency have a dependency on the crates.io
`compiler-builtins` when used with the feature
`rustc-std-workspace-core`. Use a Cargo patch to select the in-tree
version instead.

`compiler-builtins` is also added as a dependency of
`rustc-std-workspace-core` so these crates can remove their crates.io
dependency in the future.
---
 compiler-builtins/Cargo.toml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 11ee91954..df8e96482 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -17,9 +17,7 @@ doctest = false
 test = false
 
 [dependencies]
-# For more information on this dependency see
-# https://github.com/rust-lang/rust/tree/master/library/rustc-std-workspace-core
-core = { version = "1.0.1", optional = true, package = "rustc-std-workspace-core" }
+core = { path = "../../core", optional = true }
 
 [build-dependencies]
 cc = { optional = true, version = "1.2" }

From 8f802ae454432a8cd30df5d376dbd29694e6bb71 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 5 Jun 2025 06:00:54 +0000
Subject: [PATCH 058/133] compiler-builtins: Fix a `rustdoc::bare-urls` error

---
 compiler-builtins/src/aarch64_linux.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs
index e238d0237..226121237 100644
--- a/compiler-builtins/src/aarch64_linux.rs
+++ b/compiler-builtins/src/aarch64_linux.rs
@@ -4,7 +4,7 @@
 //! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics",
 //! where atomic operations call into the compiler runtime to dispatch between two depending on
 //! which is supported on the current CPU.
-//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion.
+//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
 //!
 //! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
 //! Use the `compiler-rt` intrinsics if you want LSE support.

From 714314f48b9c0b23c083b0840930f6b000b9cc2a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 8 Jun 2025 10:21:57 +0000
Subject: [PATCH 059/133] compiler-builtins: Resolve `unsafe_op_in_unsafe_fn`
 on Arm32 Android

There are a few places that violate this lint, which showed up in
rust-lang/rust CI (the relevent module is gated behind
`kernel_user_helpers` which is only set for `armv4t`, `armv5te`, and
`arm-linux-androideabi`; none of these are tested in compiler-builtins
CI). Add new `unsafe { /* ... */ }` blocks where needed to address this.

Some blocks should get a more thorough review of their preconditions, so
their safety comments are left as `FIXME`s.
---
 compiler-builtins/src/arm_linux.rs | 40 ++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/compiler-builtins/src/arm_linux.rs b/compiler-builtins/src/arm_linux.rs
index 6ce67ba71..ab9f86807 100644
--- a/compiler-builtins/src/arm_linux.rs
+++ b/compiler-builtins/src/arm_linux.rs
@@ -4,12 +4,17 @@ use core::{arch, mem};
 // Kernel-provided user-mode helper functions:
 // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
 unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool {
-    let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ());
+    // FIXME(volatile): the third parameter is a volatile pointer
+    // SAFETY: kernel docs specify a known address with the given signature
+    let f = unsafe {
+        mem::transmute::<_, extern "C" fn(u32, u32, *mut u32) -> u32>(0xffff0fc0usize as *const ())
+    };
     f(oldval, newval, ptr) == 0
 }
 
 unsafe fn __kuser_memory_barrier() {
-    let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ());
+    // SAFETY: kernel docs specify a known address with the given signature
+    let f = unsafe { mem::transmute::<_, extern "C" fn()>(0xffff0fa0usize as *const ()) };
     f();
 }
 
@@ -67,8 +72,10 @@ fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 {
 /// - if `size_of::<T>() == 2`, `ptr` or `ptr` offset by 2 bytes must be valid for a relaxed atomic
 ///   read of 2 bytes.
 /// - if `size_of::<T>() == 4`, `ptr` must be valid for a relaxed atomic read of 4 bytes.
+// FIXME: assert some of the preconditions in debug mode
 unsafe fn atomic_load_aligned<T>(ptr: *mut u32) -> u32 {
-    if mem::size_of::<T>() == 4 {
+    const { assert!(size_of::<T>() <= 4) };
+    if size_of::<T>() == 4 {
         // SAFETY: As `T` has a size of 4, the caller garantees this is sound.
         unsafe { AtomicU32::from_ptr(ptr).load(Ordering::Relaxed) }
     } else {
@@ -100,11 +107,13 @@ unsafe fn atomic_rmw<T, F: Fn(u32) -> u32, G: Fn(u32, u32) -> u32>(ptr: *mut T,
     let (shift, mask) = get_shift_mask(ptr);
 
     loop {
-        let curval_aligned = atomic_load_aligned::<T>(aligned_ptr);
+        // FIXME(safety): preconditions review needed
+        let curval_aligned = unsafe { atomic_load_aligned::<T>(aligned_ptr) };
         let curval = extract_aligned(curval_aligned, shift, mask);
         let newval = f(curval);
         let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
-        if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
+        // FIXME(safety): preconditions review needed
+        if unsafe { __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) } {
             return g(curval, newval);
         }
     }
@@ -116,13 +125,15 @@ unsafe fn atomic_cmpxchg<T>(ptr: *mut T, oldval: u32, newval: u32) -> u32 {
     let (shift, mask) = get_shift_mask(ptr);
 
     loop {
-        let curval_aligned = atomic_load_aligned::<T>(aligned_ptr);
+        // FIXME(safety): preconditions review needed
+        let curval_aligned = unsafe { atomic_load_aligned::<T>(aligned_ptr) };
         let curval = extract_aligned(curval_aligned, shift, mask);
         if curval != oldval {
             return curval;
         }
         let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
-        if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
+        // FIXME(safety): preconditions review needed
+        if unsafe { __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) } {
             return oldval;
         }
     }
@@ -132,7 +143,14 @@ macro_rules! atomic_rmw {
     ($name:ident, $ty:ty, $op:expr, $fetch:expr) => {
         intrinsics! {
             pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty {
-                atomic_rmw(ptr, |x| $op(x as $ty, val) as u32, |old, new| $fetch(old, new)) as $ty
+                // FIXME(safety): preconditions review needed
+                unsafe {
+                    atomic_rmw(
+                        ptr,
+                        |x| $op(x as $ty, val) as u32,
+                        |old, new| $fetch(old, new)
+                    ) as $ty
+                }
             }
         }
     };
@@ -149,7 +167,8 @@ macro_rules! atomic_cmpxchg {
     ($name:ident, $ty:ty) => {
         intrinsics! {
             pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty {
-                atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty
+                // FIXME(safety): preconditions review needed
+                unsafe { atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty }
             }
         }
     };
@@ -285,6 +304,7 @@ atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32);
 
 intrinsics! {
     pub unsafe extern "C" fn __sync_synchronize() {
-        __kuser_memory_barrier();
+       // SAFETY: preconditions are the same as the calling function.
+       unsafe {  __kuser_memory_barrier() };
     }
 }

From d17f101f0d5eba57120ad60358c478442c284303 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 8 Jun 2025 17:13:18 +0000
Subject: [PATCH 060/133] compiler-builtins: Specify `:r` registers for `usize`

On the ILP32 `x86_64-unknown-linux-gnux32` target, `usize` is 32 bits so
there is a sub-register alignment warning. Specify the 64-bit `r`
registers, which matches the current default as well as the size of the
other operands in the routines.
---
 compiler-builtins/src/mem/x86_64.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler-builtins/src/mem/x86_64.rs b/compiler-builtins/src/mem/x86_64.rs
index 5cbe83ab1..fb29eb11b 100644
--- a/compiler-builtins/src/mem/x86_64.rs
+++ b/compiler-builtins/src/mem/x86_64.rs
@@ -69,7 +69,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) {
         "rep movsb",
         "sub $7, %rsi",
         "sub $7, %rdi",
-        "mov {qword_count}, %rcx",
+        "mov {qword_count:r}, %rcx",
         "rep movsq",
         "test {pre_byte_count:e}, {pre_byte_count:e}",
         "add $7, %rsi",
@@ -212,7 +212,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
     let x = {
         let r;
         asm!(
-            "movdqa ({addr}), {dest}",
+            "movdqa ({addr:r}), {dest}",
             addr = in(reg) s,
             dest = out(xmm_reg) r,
             options(att_syntax, nostack),
@@ -232,7 +232,7 @@ pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
         let x = {
             let r;
             asm!(
-                "movdqa ({addr}), {dest}",
+                "movdqa ({addr:r}), {dest}",
                 addr = in(reg) s,
                 dest = out(xmm_reg) r,
                 options(att_syntax, nostack),

From d1d8fb29fe06adb56176d45c2dd810153a5ce006 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 9 Jun 2025 04:10:24 +0000
Subject: [PATCH 061/133] compiler-builtins: Emit `rustc-check-cfg` earlier

The `build.rs` entrypoint returns early for some targets, so emscripten
and OpenBSD were not getting check-cfg set. Emit these earlier to avoid
the `unexpected_cfgs` lint.
---
 compiler-builtins/build.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs
index d37fdc5df..7c8da02fd 100644
--- a/compiler-builtins/build.rs
+++ b/compiler-builtins/build.rs
@@ -22,6 +22,9 @@ fn main() {
 
     println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display());
 
+    println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)");
+    println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))");
+
     // Emscripten's runtime includes all the builtins
     if target.os == "emscripten" {
         return;
@@ -47,7 +50,6 @@ fn main() {
     }
 
     // These targets have hardware unaligned access support.
-    println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))");
     if target.arch.contains("x86_64")
         || target.arch.contains("x86")
         || target.arch.contains("aarch64")
@@ -78,7 +80,6 @@ fn main() {
     // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This
     // includes the old androideabi. It is deprecated but it is available as a
     // rustc target (arm-linux-androideabi).
-    println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)");
     if llvm_target[0] == "armv4t"
         || llvm_target[0] == "armv5te"
         || target.triple == "arm-linux-androideabi"

From 20510166d01c0fb6fbe8927dd53f73fb13e95d46 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 10 Jun 2025 06:49:28 +0000
Subject: [PATCH 062/133] compiler-builtins: Remove unused `lints.rust` table

The unexpected configs are now unused or known to `rustc` in our CI.
---
 compiler-builtins/Cargo.toml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 11ee91954..eabb3d625 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -55,7 +55,3 @@ rustc-dep-of-std = ["compiler-builtins", "dep:core"]
 # This makes certain traits and function specializations public that
 # are not normally public but are required by the `builtins-test`
 unstable-public-internals = []
-
-[lints.rust]
-# The cygwin config can be dropped after our benchmark toolchain is bumped
-unexpected_cfgs = { level = "warn", check-cfg = ['cfg(bootstrap)', 'cfg(target_os, values("cygwin"))'] }

From add44a716f996401dac618417519ac31185b80c8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 10 Jun 2025 07:02:57 +0000
Subject: [PATCH 063/133] ci: Fix a typo that was causing a command failure

---
 ci/bench-icount.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
index 5724955fe..d2baebb52 100755
--- a/ci/bench-icount.sh
+++ b/ci/bench-icount.sh
@@ -57,7 +57,7 @@ function run_icount_benchmarks() {
         # Disregard regressions after merge
         echo "Benchmarks completed with regressions; ignoring (not in a PR)"
     else
-        ./ci/ci-util.py handle-banch-regressions "$PR_NUMBER"
+        ./ci/ci-util.py handle-bench-regressions "$PR_NUMBER"
     fi
 }
 

From 319637f544d9dda8fc3dd482d9979e0da135a258 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 10 Jun 2025 00:59:09 +0200
Subject: [PATCH 064/133] add a fixme to use `extern_custom` when available

---
 compiler-builtins/src/probestack.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index c9070cf55..16faaa67f 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -125,6 +125,9 @@ macro_rules! define_rust_probestack {
 // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
 // ensuring that if any pages are unmapped we'll make a page fault.
 //
+// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
+// it does not actually match `extern "C"`.
+//
 // The ABI here is that the stack frame size is located in `%rax`. Upon
 // return we're not supposed to modify `%rsp` or `%rax`.
 //
@@ -260,6 +263,9 @@ core::arch::global_asm!(
 // that on Unix we're expected to restore everything as it was, this
 // function basically can't tamper with anything.
 //
+// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
+// it does not actually match `extern "C"`.
+//
 // The ABI here is the same as x86_64, except everything is 32-bits large.
 core::arch::global_asm!(
     define_rust_probestack!(
@@ -303,6 +309,9 @@ core::arch::global_asm!(
 // probestack function will also do things like _chkstk in MSVC.
 // So we need to sub %ax %sp in probestack when arch is x86.
 //
+// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
+// it does not actually match `extern "C"`.
+//
 // REF: Rust commit(74e80468347)
 // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
 // Comments in LLVM:

From 0f8e54c0a0431d5925bf47a0469239ae00f1cc4f Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 4 Jun 2025 01:31:34 +0200
Subject: [PATCH 065/133] use `#[naked]` for `__rust_probestack`

---
 compiler-builtins/src/lib.rs        |   1 +
 compiler-builtins/src/probestack.rs | 129 +++++++---------------------
 2 files changed, 33 insertions(+), 97 deletions(-)

diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs
index 6a6b28067..6549d4cef 100644
--- a/compiler-builtins/src/lib.rs
+++ b/compiler-builtins/src/lib.rs
@@ -8,6 +8,7 @@
 #![feature(linkage)]
 #![feature(naked_functions)]
 #![feature(repr_simd)]
+#![feature(rustc_attrs)]
 #![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
 #![no_builtins]
diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index 16faaa67f..e9a26dff1 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -49,79 +49,6 @@
 // We only define stack probing for these architectures today.
 #![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 
-// SAFETY: defined in this module.
-// FIXME(extern_custom): the ABI is not correct.
-unsafe extern "C" {
-    pub fn __rust_probestack();
-}
-
-// A wrapper for our implementation of __rust_probestack, which allows us to
-// keep the assembly inline while controlling all CFI directives in the assembly
-// emitted for the function.
-//
-// This is the ELF version.
-#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
-macro_rules! define_rust_probestack {
-    ($body: expr) => {
-        concat!(
-            "
-            .pushsection .text.__rust_probestack
-            .globl __rust_probestack
-            .type  __rust_probestack, @function
-            .hidden __rust_probestack
-        __rust_probestack:
-            ",
-            $body,
-            "
-            .size __rust_probestack, . - __rust_probestack
-            .popsection
-            "
-        )
-    };
-}
-
-#[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
-macro_rules! define_rust_probestack {
-    ($body: expr) => {
-        concat!(
-            "
-            .globl __rust_probestack
-        __rust_probestack:
-            ",
-            $body
-        )
-    };
-}
-
-// Same as above, but for Mach-O. Note that the triple underscore
-// is deliberate
-#[cfg(target_vendor = "apple")]
-macro_rules! define_rust_probestack {
-    ($body: expr) => {
-        concat!(
-            "
-            .globl ___rust_probestack
-        ___rust_probestack:
-            ",
-            $body
-        )
-    };
-}
-
-// In UEFI x86 arch, triple underscore is deliberate.
-#[cfg(all(target_os = "uefi", target_arch = "x86"))]
-macro_rules! define_rust_probestack {
-    ($body: expr) => {
-        concat!(
-            "
-            .globl ___rust_probestack
-        ___rust_probestack:
-            ",
-            $body
-        )
-    };
-}
-
 // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
 // ensuring that if any pages are unmapped we'll make a page fault.
 //
@@ -136,8 +63,10 @@ macro_rules! define_rust_probestack {
     target_arch = "x86_64",
     not(all(target_env = "sgx", target_vendor = "fortanix"))
 ))]
-core::arch::global_asm!(
-    define_rust_probestack!(
+#[unsafe(naked)]
+#[rustc_std_internal_symbol]
+pub unsafe extern "C" fn __rust_probestack() {
+    core::arch::naked_asm!(
         "
     .cfi_startproc
     pushq  %rbp
@@ -187,10 +116,10 @@ core::arch::global_asm!(
     .cfi_adjust_cfa_offset -8
     ret
     .cfi_endproc
-    "
-    ),
-    options(att_syntax)
-);
+    ",
+        options(att_syntax)
+    )
+}
 
 // This function is the same as above, except that some instructions are
 // [manually patched for LVI].
@@ -200,8 +129,10 @@ core::arch::global_asm!(
     target_arch = "x86_64",
     all(target_env = "sgx", target_vendor = "fortanix")
 ))]
-core::arch::global_asm!(
-    define_rust_probestack!(
+#[unsafe(naked)]
+#[no_mangle]
+pub unsafe extern "C" fn __rust_probestack() {
+    core::arch::naked_asm!(
         "
     .cfi_startproc
     pushq  %rbp
@@ -253,10 +184,10 @@ core::arch::global_asm!(
     lfence
     jmp *%r11
     .cfi_endproc
-    "
-    ),
-    options(att_syntax)
-);
+    ",
+        options(att_syntax)
+    )
+}
 
 #[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
 // This is the same as x86_64 above, only translated for 32-bit sizes. Note
@@ -267,8 +198,10 @@ core::arch::global_asm!(
 // it does not actually match `extern "C"`.
 //
 // The ABI here is the same as x86_64, except everything is 32-bits large.
-core::arch::global_asm!(
-    define_rust_probestack!(
+#[unsafe(naked)]
+#[rustc_std_internal_symbol]
+pub unsafe extern "C" fn __rust_probestack() {
+    core::arch::naked_asm!(
         "
     .cfi_startproc
     push   %ebp
@@ -299,10 +232,10 @@ core::arch::global_asm!(
     .cfi_adjust_cfa_offset -4
     ret
     .cfi_endproc
-    "
-    ),
-    options(att_syntax)
-);
+    ",
+        options(att_syntax)
+    )
+}
 
 #[cfg(all(target_arch = "x86", target_os = "uefi"))]
 // UEFI target is windows like target. LLVM will do _chkstk things like windows.
@@ -318,8 +251,10 @@ core::arch::global_asm!(
 //   MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
 //   MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
 //   themselves.
-core::arch::global_asm!(
-    define_rust_probestack!(
+#[unsafe(naked)]
+#[rustc_std_internal_symbol]
+pub unsafe extern "C" fn __rust_probestack() {
+    core::arch::naked_asm!(
         "
     .cfi_startproc
     push   %ebp
@@ -355,7 +290,7 @@ core::arch::global_asm!(
     .cfi_adjust_cfa_offset -4
     ret
     .cfi_endproc
-    "
-    ),
-    options(att_syntax)
-);
+    ",
+        options(att_syntax)
+    )
+}

From f9395626288ff91e8d5499207f14fd57c2a16498 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 4 Jun 2025 01:32:04 +0200
Subject: [PATCH 066/133] merge the sgx/fortanix `__rust_probestack` into the
 general `x86_64` one

---
 compiler-builtins/src/probestack.rs | 96 +++++++----------------------
 1 file changed, 23 insertions(+), 73 deletions(-)

diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index e9a26dff1..2375107e3 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -57,15 +57,31 @@
 //
 // The ABI here is that the stack frame size is located in `%rax`. Upon
 // return we're not supposed to modify `%rsp` or `%rax`.
-//
-// Any changes to this function should be replicated to the SGX version below.
-#[cfg(all(
-    target_arch = "x86_64",
-    not(all(target_env = "sgx", target_vendor = "fortanix"))
-))]
+#[cfg(target_arch = "x86_64")]
 #[unsafe(naked)]
 #[rustc_std_internal_symbol]
 pub unsafe extern "C" fn __rust_probestack() {
+    #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))]
+    macro_rules! ret {
+        () => {
+            "ret"
+        };
+    }
+
+    #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))]
+    macro_rules! ret {
+        // for this target, [manually patch for LVI].
+        //
+        // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
+        () => {
+            "
+            pop %r11
+            lfence
+            jmp *%r11
+            "
+        };
+    }
+
     core::arch::naked_asm!(
         "
     .cfi_startproc
@@ -114,75 +130,9 @@ pub unsafe extern "C" fn __rust_probestack() {
     leave
     .cfi_def_cfa_register %rsp
     .cfi_adjust_cfa_offset -8
-    ret
-    .cfi_endproc
     ",
-        options(att_syntax)
-    )
-}
-
-// This function is the same as above, except that some instructions are
-// [manually patched for LVI].
-//
-// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
-#[cfg(all(
-    target_arch = "x86_64",
-    all(target_env = "sgx", target_vendor = "fortanix")
-))]
-#[unsafe(naked)]
-#[no_mangle]
-pub unsafe extern "C" fn __rust_probestack() {
-    core::arch::naked_asm!(
+        ret!(),
         "
-    .cfi_startproc
-    pushq  %rbp
-    .cfi_adjust_cfa_offset 8
-    .cfi_offset %rbp, -16
-    movq   %rsp, %rbp
-    .cfi_def_cfa_register %rbp
-
-    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
-
-    // Main loop, taken in one page increments. We're decrementing rsp by
-    // a page each time until there's less than a page remaining. We're
-    // guaranteed that this function isn't called unless there's more than a
-    // page needed.
-    //
-    // Note that we're also testing against `8(%rsp)` to account for the 8
-    // bytes pushed on the stack orginally with our return address. Using
-    // `8(%rsp)` simulates us testing the stack pointer in the caller's
-    // context.
-
-    // It's usually called when %rax >= 0x1000, but that's not always true.
-    // Dynamic stack allocation, which is needed to implement unsized
-    // rvalues, triggers stackprobe even if %rax < 0x1000.
-    // Thus we have to check %r11 first to avoid segfault.
-    cmp    $0x1000,%r11
-    jna    3f
-2:
-    sub    $0x1000,%rsp
-    test   %rsp,8(%rsp)
-    sub    $0x1000,%r11
-    cmp    $0x1000,%r11
-    ja     2b
-
-3:
-    // Finish up the last remaining stack space requested, getting the last
-    // bits out of r11
-    sub    %r11,%rsp
-    test   %rsp,8(%rsp)
-
-    // Restore the stack pointer to what it previously was when entering
-    // this function. The caller will readjust the stack pointer after we
-    // return.
-    add    %rax,%rsp
-
-    leave
-    .cfi_def_cfa_register %rsp
-    .cfi_adjust_cfa_offset -8
-    pop %r11
-    lfence
-    jmp *%r11
     .cfi_endproc
     ",
         options(att_syntax)

From c885ce088cf8e5ed65478226565d58976e8aa129 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 4 Jun 2025 01:33:16 +0200
Subject: [PATCH 067/133] indent the probestack inline assembly

---
 compiler-builtins/src/probestack.rs | 220 ++++++++++++++--------------
 1 file changed, 110 insertions(+), 110 deletions(-)

diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index 2375107e3..1441fd73b 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -84,56 +84,56 @@ pub unsafe extern "C" fn __rust_probestack() {
 
     core::arch::naked_asm!(
         "
-    .cfi_startproc
-    pushq  %rbp
-    .cfi_adjust_cfa_offset 8
-    .cfi_offset %rbp, -16
-    movq   %rsp, %rbp
-    .cfi_def_cfa_register %rbp
-
-    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
-
-    // Main loop, taken in one page increments. We're decrementing rsp by
-    // a page each time until there's less than a page remaining. We're
-    // guaranteed that this function isn't called unless there's more than a
-    // page needed.
-    //
-    // Note that we're also testing against `8(%rsp)` to account for the 8
-    // bytes pushed on the stack orginally with our return address. Using
-    // `8(%rsp)` simulates us testing the stack pointer in the caller's
-    // context.
-
-    // It's usually called when %rax >= 0x1000, but that's not always true.
-    // Dynamic stack allocation, which is needed to implement unsized
-    // rvalues, triggers stackprobe even if %rax < 0x1000.
-    // Thus we have to check %r11 first to avoid segfault.
-    cmp    $0x1000,%r11
-    jna    3f
-2:
-    sub    $0x1000,%rsp
-    test   %rsp,8(%rsp)
-    sub    $0x1000,%r11
-    cmp    $0x1000,%r11
-    ja     2b
-
-3:
-    // Finish up the last remaining stack space requested, getting the last
-    // bits out of r11
-    sub    %r11,%rsp
-    test   %rsp,8(%rsp)
-
-    // Restore the stack pointer to what it previously was when entering
-    // this function. The caller will readjust the stack pointer after we
-    // return.
-    add    %rax,%rsp
-
-    leave
-    .cfi_def_cfa_register %rsp
-    .cfi_adjust_cfa_offset -8
+            .cfi_startproc
+            pushq  %rbp
+            .cfi_adjust_cfa_offset 8
+            .cfi_offset %rbp, -16
+            movq   %rsp, %rbp
+            .cfi_def_cfa_register %rbp
+
+            mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
+
+            // Main loop, taken in one page increments. We're decrementing rsp by
+            // a page each time until there's less than a page remaining. We're
+            // guaranteed that this function isn't called unless there's more than a
+            // page needed.
+            //
+            // Note that we're also testing against `8(%rsp)` to account for the 8
+            // bytes pushed on the stack orginally with our return address. Using
+            // `8(%rsp)` simulates us testing the stack pointer in the caller's
+            // context.
+
+            // It's usually called when %rax >= 0x1000, but that's not always true.
+            // Dynamic stack allocation, which is needed to implement unsized
+            // rvalues, triggers stackprobe even if %rax < 0x1000.
+            // Thus we have to check %r11 first to avoid segfault.
+            cmp    $0x1000,%r11
+            jna    3f
+        2:
+            sub    $0x1000,%rsp
+            test   %rsp,8(%rsp)
+            sub    $0x1000,%r11
+            cmp    $0x1000,%r11
+            ja     2b
+
+        3:
+            // Finish up the last remaining stack space requested, getting the last
+            // bits out of r11
+            sub    %r11,%rsp
+            test   %rsp,8(%rsp)
+
+            // Restore the stack pointer to what it previously was when entering
+            // this function. The caller will readjust the stack pointer after we
+            // return.
+            add    %rax,%rsp
+
+            leave
+            .cfi_def_cfa_register %rsp
+            .cfi_adjust_cfa_offset -8
     ",
         ret!(),
         "
-    .cfi_endproc
+            .cfi_endproc
     ",
         options(att_syntax)
     )
@@ -153,35 +153,35 @@ pub unsafe extern "C" fn __rust_probestack() {
 pub unsafe extern "C" fn __rust_probestack() {
     core::arch::naked_asm!(
         "
-    .cfi_startproc
-    push   %ebp
-    .cfi_adjust_cfa_offset 4
-    .cfi_offset %ebp, -8
-    mov    %esp, %ebp
-    .cfi_def_cfa_register %ebp
-    push   %ecx
-    mov    %eax,%ecx
-
-    cmp    $0x1000,%ecx
-    jna    3f
-2:
-    sub    $0x1000,%esp
-    test   %esp,8(%esp)
-    sub    $0x1000,%ecx
-    cmp    $0x1000,%ecx
-    ja     2b
-
-3:
-    sub    %ecx,%esp
-    test   %esp,8(%esp)
-
-    add    %eax,%esp
-    pop    %ecx
-    leave
-    .cfi_def_cfa_register %esp
-    .cfi_adjust_cfa_offset -4
-    ret
-    .cfi_endproc
+            .cfi_startproc
+            push   %ebp
+            .cfi_adjust_cfa_offset 4
+            .cfi_offset %ebp, -8
+            mov    %esp, %ebp
+            .cfi_def_cfa_register %ebp
+            push   %ecx
+            mov    %eax,%ecx
+
+            cmp    $0x1000,%ecx
+            jna    3f
+        2:
+            sub    $0x1000,%esp
+            test   %esp,8(%esp)
+            sub    $0x1000,%ecx
+            cmp    $0x1000,%ecx
+            ja     2b
+
+        3:
+            sub    %ecx,%esp
+            test   %esp,8(%esp)
+
+            add    %eax,%esp
+            pop    %ecx
+            leave
+            .cfi_def_cfa_register %esp
+            .cfi_adjust_cfa_offset -4
+            ret
+            .cfi_endproc
     ",
         options(att_syntax)
     )
@@ -206,40 +206,40 @@ pub unsafe extern "C" fn __rust_probestack() {
 pub unsafe extern "C" fn __rust_probestack() {
     core::arch::naked_asm!(
         "
-    .cfi_startproc
-    push   %ebp
-    .cfi_adjust_cfa_offset 4
-    .cfi_offset %ebp, -8
-    mov    %esp, %ebp
-    .cfi_def_cfa_register %ebp
-    push   %ecx
-    push   %edx
-    mov    %eax,%ecx
-
-    cmp    $0x1000,%ecx
-    jna    3f
-2:
-    sub    $0x1000,%esp
-    test   %esp,8(%esp)
-    sub    $0x1000,%ecx
-    cmp    $0x1000,%ecx
-    ja     2b
-
-3:
-    sub    %ecx,%esp
-    test   %esp,8(%esp)
-    mov    4(%ebp),%edx
-    mov    %edx, 12(%esp)
-    add    %eax,%esp
-    pop    %edx
-    pop    %ecx
-    leave
-
-    sub   %eax, %esp
-    .cfi_def_cfa_register %esp
-    .cfi_adjust_cfa_offset -4
-    ret
-    .cfi_endproc
+            .cfi_startproc
+            push   %ebp
+            .cfi_adjust_cfa_offset 4
+            .cfi_offset %ebp, -8
+            mov    %esp, %ebp
+            .cfi_def_cfa_register %ebp
+            push   %ecx
+            push   %edx
+            mov    %eax,%ecx
+
+            cmp    $0x1000,%ecx
+            jna    3f
+        2:
+            sub    $0x1000,%esp
+            test   %esp,8(%esp)
+            sub    $0x1000,%ecx
+            cmp    $0x1000,%ecx
+            ja     2b
+
+        3:
+            sub    %ecx,%esp
+            test   %esp,8(%esp)
+            mov    4(%ebp),%edx
+            mov    %edx, 12(%esp)
+            add    %eax,%esp
+            pop    %edx
+            pop    %ecx
+            leave
+
+            sub   %eax, %esp
+            .cfi_def_cfa_register %esp
+            .cfi_adjust_cfa_offset -4
+            ret
+            .cfi_endproc
     ",
         options(att_syntax)
     )

From 2f01db9bf56b5d559b53e601b537e2053905e03e Mon Sep 17 00:00:00 2001
From: qinghon <wushengshijie@outlook.com>
Date: Fri, 13 Jun 2025 13:27:47 +0800
Subject: [PATCH 068/133] Eliminate `build.rs`-generated Aarch64 atomic macros
 (#951)

Replace `build.rs` Rust generation with macros, using the unstable
`${concat(...)}`.

Fixes: https://github.com/rust-lang/compiler-builtins/issues/947
---
 builtins-test/tests/lse.rs             |  3 +-
 compiler-builtins/build.rs             | 62 ---------------------
 compiler-builtins/src/aarch64_linux.rs | 74 +++++++++++++++++++++++++-
 compiler-builtins/src/lib.rs           |  1 +
 4 files changed, 75 insertions(+), 65 deletions(-)

diff --git a/builtins-test/tests/lse.rs b/builtins-test/tests/lse.rs
index 53167d98f..0d85228d7 100644
--- a/builtins-test/tests/lse.rs
+++ b/builtins-test/tests/lse.rs
@@ -1,4 +1,5 @@
 #![feature(decl_macro)] // so we can use pub(super)
+#![feature(macro_metavar_expr_concat)]
 #![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))]
 
 /// Translate a byte size to a Rust type.
@@ -87,7 +88,7 @@ test_op!(add, |left, right| left.wrapping_add(right));
 test_op!(clr, |left, right| left & !right);
 test_op!(xor, std::ops::BitXor::bitxor);
 test_op!(or, std::ops::BitOr::bitor);
-
+use compiler_builtins::{foreach_bytes, foreach_ordering};
 compiler_builtins::foreach_cas!(cas::test);
 compiler_builtins::foreach_cas16!(test_cas16);
 compiler_builtins::foreach_swp!(swap::test);
diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs
index d37fdc5df..e909a0dcb 100644
--- a/compiler-builtins/build.rs
+++ b/compiler-builtins/build.rs
@@ -1,9 +1,6 @@
 mod configure;
 
-use std::collections::BTreeMap;
 use std::env;
-use std::path::PathBuf;
-use std::sync::atomic::Ordering;
 
 use configure::{Target, configure_aliases, configure_f16_f128};
 
@@ -85,10 +82,6 @@ fn main() {
     {
         println!("cargo:rustc-cfg=kernel_user_helpers")
     }
-
-    if llvm_target[0].starts_with("aarch64") {
-        generate_aarch64_outlined_atomics();
-    }
 }
 
 /// Run configuration for `libm` since it is included directly.
@@ -131,61 +124,6 @@ fn configure_libm(target: &Target) {
     println!("cargo:rustc-cfg=feature=\"unstable-intrinsics\"");
 }
 
-fn aarch64_symbol(ordering: Ordering) -> &'static str {
-    match ordering {
-        Ordering::Relaxed => "relax",
-        Ordering::Acquire => "acq",
-        Ordering::Release => "rel",
-        Ordering::AcqRel => "acq_rel",
-        _ => panic!("unknown symbol for {ordering:?}"),
-    }
-}
-
-/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items.
-/// Define them from the build script instead.
-/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros.
-fn generate_aarch64_outlined_atomics() {
-    use std::fmt::Write;
-    // #[macro_export] so that we can use this in tests
-    let gen_macro =
-        |name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n");
-
-    // Generate different macros for add/clr/eor/set so that we can test them separately.
-    let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"];
-    let mut macros = BTreeMap::new();
-    for sym in sym_names {
-        macros.insert(sym, gen_macro(sym));
-    }
-
-    // Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
-    let mut cas16 = gen_macro("cas16");
-
-    for ordering in [
-        Ordering::Relaxed,
-        Ordering::Acquire,
-        Ordering::Release,
-        Ordering::AcqRel,
-    ] {
-        let sym_ordering = aarch64_symbol(ordering);
-        for size in [1, 2, 4, 8] {
-            for (sym, macro_) in &mut macros {
-                let name = format!("__aarch64_{sym}{size}_{sym_ordering}");
-                writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap();
-            }
-        }
-        let name = format!("__aarch64_cas16_{sym_ordering}");
-        writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap();
-    }
-
-    let mut buf = String::new();
-    for macro_def in macros.values().chain(std::iter::once(&cas16)) {
-        buf += macro_def;
-        buf += "}; }\n";
-    }
-    let out_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
-    std::fs::write(out_dir.join("outlined_atomics.rs"), buf).unwrap();
-}
-
 /// Emit directives for features we expect to support that aren't in `Cargo.toml`.
 ///
 /// These are mostly cfg elements emitted by this `build.rs`.
diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs
index e238d0237..2402a3fe1 100644
--- a/compiler-builtins/src/aarch64_linux.rs
+++ b/compiler-builtins/src/aarch64_linux.rs
@@ -262,8 +262,78 @@ macro_rules! or {
     };
 }
 
-// See `generate_aarch64_outlined_atomics` in build.rs.
-include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs"));
+#[macro_export]
+macro_rules! foreach_ordering {
+    ($macro:path, $bytes:tt, $name:ident) => {
+        $macro!( Relaxed, $bytes, ${concat($name, _relax)} );
+        $macro!( Acquire, $bytes, ${concat($name, _acq)} );
+        $macro!( Release, $bytes, ${concat($name, _rel)} );
+        $macro!( AcqRel, $bytes, ${concat($name, _acq_rel)} );
+    };
+    ($macro:path, $name:ident) => {
+        $macro!( Relaxed, ${concat($name, _relax)} );
+        $macro!( Acquire, ${concat($name, _acq)} );
+        $macro!( Release, ${concat($name, _rel)} );
+        $macro!( AcqRel, ${concat($name, _acq_rel)} );
+    };
+}
+
+#[macro_export]
+macro_rules! foreach_bytes {
+    ($macro:path, $name:ident) => {
+        foreach_ordering!( $macro, 1, ${concat(__aarch64_, $name, "1")} );
+        foreach_ordering!( $macro, 2, ${concat(__aarch64_, $name, "2")} );
+        foreach_ordering!( $macro, 4, ${concat(__aarch64_, $name, "4")} );
+        foreach_ordering!( $macro, 8, ${concat(__aarch64_, $name, "8")} );
+    };
+}
+
+/// Generate different macros for cas/swp/add/clr/eor/set so that we can test them separately.
+#[macro_export]
+macro_rules! foreach_cas {
+    ($macro:path) => {
+        foreach_bytes!($macro, cas);
+    };
+}
+
+/// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
+#[macro_export]
+macro_rules! foreach_cas16 {
+    ($macro:path) => {
+        foreach_ordering!($macro, __aarch64_cas16);
+    };
+}
+#[macro_export]
+macro_rules! foreach_swp {
+    ($macro:path) => {
+        foreach_bytes!($macro, swp);
+    };
+}
+#[macro_export]
+macro_rules! foreach_ldadd {
+    ($macro:path) => {
+        foreach_bytes!($macro, ldadd);
+    };
+}
+#[macro_export]
+macro_rules! foreach_ldclr {
+    ($macro:path) => {
+        foreach_bytes!($macro, ldclr);
+    };
+}
+#[macro_export]
+macro_rules! foreach_ldeor {
+    ($macro:path) => {
+        foreach_bytes!($macro, ldeor);
+    };
+}
+#[macro_export]
+macro_rules! foreach_ldset {
+    ($macro:path) => {
+        foreach_bytes!($macro, ldset);
+    };
+}
+
 foreach_cas!(compare_and_swap);
 foreach_cas16!(compare_and_swap_i128);
 foreach_swp!(swap);
diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs
index 6a6b28067..ef3299d69 100644
--- a/compiler-builtins/src/lib.rs
+++ b/compiler-builtins/src/lib.rs
@@ -8,6 +8,7 @@
 #![feature(linkage)]
 #![feature(naked_functions)]
 #![feature(repr_simd)]
+#![feature(macro_metavar_expr_concat)]
 #![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
 #![no_builtins]

From f6a23a78c44e96780de730d419c7f8b0afebfb34 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 2 Jun 2025 17:20:22 +0000
Subject: [PATCH 069/133] fmaximum,fminimum: Fix incorrect result and add tests

After adding tests, the current implementation for fminimum fails when
provided a negative zero and NaN as inputs:

    ---- math::fminimum_fmaximum_num::tests::fmaximum_num_spec_tests_f64 stdout ----

    thread 'math::fminimum_fmaximum_num::tests::fmaximum_num_spec_tests_f64' panicked at libm/src/math/fminimum_fmaximum_num.rs:240:13:
    fmaximum_num(-0x0p+0, NaN)
    l: NaN (0x7ff8000000000000)
    r: -0.0 (0x8000000000000000)

    ---- math::fminimum_fmaximum_num::tests::fmaximum_num_spec_tests_f32 stdout ----

    thread 'math::fminimum_fmaximum_num::tests::fmaximum_num_spec_tests_f32' panicked at libm/src/math/fminimum_fmaximum_num.rs:240:13:
    fmaximum_num(-0x0p+0, NaN)
    l: NaN (0x7fc00000)
    r: -0.0 (0x80000000)

Add more thorough spec tests for these functions and correct the
implementations.

Canonicalization is also moved to a trait method to centralize
documentation about what it does and doesn't do.
---
 libm/src/math/fmin_fmax.rs             | 122 ++++++++++++++++++++--
 libm/src/math/fminimum_fmaximum.rs     | 126 ++++++++++++++++++++--
 libm/src/math/fminimum_fmaximum_num.rs | 138 ++++++++++++++++++++++---
 libm/src/math/generic/fmax.rs          |   3 +-
 libm/src/math/generic/fmaximum.rs      |   5 +-
 libm/src/math/generic/fmaximum_num.rs  |  17 +--
 libm/src/math/generic/fmin.rs          |   3 +-
 libm/src/math/generic/fminimum.rs      |   5 +-
 libm/src/math/generic/fminimum_num.rs  |  17 +--
 libm/src/math/support/float_traits.rs  |   9 ++
 libm/src/math/support/macros.rs        |   4 +-
 11 files changed, 392 insertions(+), 57 deletions(-)

diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs
index 2947b783e..481301994 100644
--- a/libm/src/math/fmin_fmax.rs
+++ b/libm/src/math/fmin_fmax.rs
@@ -82,22 +82,77 @@ mod tests {
     fn fmin_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
             (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
             (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::ZERO, F::INFINITY, F::ZERO),
+            (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::ZERO, F::NEG_NAN, F::ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ZERO, F::NAN, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::ONE, F::INFINITY, F::ONE),
+            (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ONE, F::NAN, F::ONE),
+            (F::ONE, F::NEG_NAN, F::ONE),
             (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ONE, F::NAN, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE),
             (F::INFINITY, F::ZERO, F::ZERO),
+            (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::INFINITY, F::ONE, F::ONE),
+            (F::INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::INFINITY, F::NAN, F::INFINITY),
+            (F::INFINITY, F::NEG_NAN, F::INFINITY),
             (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY),
             (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NAN, F::ONE, F::ONE),
+            (F::NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NAN, F::INFINITY, F::INFINITY),
+            (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY),
             (F::NAN, F::NAN, F::NAN),
+            (F::NEG_NAN, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ONE, F::ONE),
+            (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_NAN, F::INFINITY, F::INFINITY),
+            (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY),
         ];
 
         for (x, y, res) in cases {
             let val = f(x, y);
             assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between zeros and NaNs does not matter
+        assert_eq!(f(F::ZERO, F::NEG_ZERO), F::ZERO);
+        assert_eq!(f(F::NEG_ZERO, F::ZERO), F::ZERO);
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
@@ -125,22 +180,77 @@ mod tests {
     fn fmax_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
             (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
             (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::ZERO, F::INFINITY, F::INFINITY),
+            (F::ZERO, F::NEG_INFINITY, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::ZERO, F::NEG_NAN, F::ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::ONE),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::INFINITY, F::INFINITY),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NAN, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ONE, F::NEG_ZERO, F::ONE),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::ONE),
+            (F::ONE, F::INFINITY, F::INFINITY),
+            (F::ONE, F::NEG_INFINITY, F::ONE),
+            (F::ONE, F::NAN, F::ONE),
+            (F::ONE, F::NEG_NAN, F::ONE),
             (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ONE, F::ONE, F::ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::INFINITY),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NAN, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE),
             (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::INFINITY, F::NEG_ZERO, F::INFINITY),
+            (F::INFINITY, F::ONE, F::INFINITY),
+            (F::INFINITY, F::NEG_ONE, F::INFINITY),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::INFINITY),
+            (F::INFINITY, F::NAN, F::INFINITY),
+            (F::INFINITY, F::NEG_NAN, F::INFINITY),
             (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_INFINITY, F::ONE, F::ONE),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_INFINITY, F::INFINITY, F::INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY),
             (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NAN, F::ONE, F::ONE),
+            (F::NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NAN, F::INFINITY, F::INFINITY),
+            (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY),
             (F::NAN, F::NAN, F::NAN),
+            (F::NEG_NAN, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ONE, F::ONE),
+            (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_NAN, F::INFINITY, F::INFINITY),
+            (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY),
         ];
 
         for (x, y, res) in cases {
             let val = f(x, y);
             assert_biteq!(val, res, "fmax({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between zeros and NaNs does not matter
+        assert_eq!(f(F::ZERO, F::NEG_ZERO), F::ZERO);
+        assert_eq!(f(F::NEG_ZERO, F::ZERO), F::ZERO);
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs
index b7999e273..8f1308670 100644
--- a/libm/src/math/fminimum_fmaximum.rs
+++ b/libm/src/math/fminimum_fmaximum.rs
@@ -74,24 +74,77 @@ mod tests {
     fn fminimum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
             (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
             (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::ZERO, F::INFINITY, F::ZERO),
+            (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ZERO, F::NAN, F::NAN),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::ONE, F::INFINITY, F::ONE),
+            (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ONE, F::NAN, F::NAN),
             (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ONE, F::NAN, F::NAN),
             (F::INFINITY, F::ZERO, F::ZERO),
+            (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::INFINITY, F::ONE, F::ONE),
+            (F::INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::INFINITY, F::NAN, F::NAN),
             (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NAN),
             (F::NAN, F::ZERO, F::NAN),
-            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NEG_ZERO, F::NAN),
+            (F::NAN, F::ONE, F::NAN),
+            (F::NAN, F::NEG_ONE, F::NAN),
+            (F::NAN, F::INFINITY, F::NAN),
+            (F::NAN, F::NEG_INFINITY, F::NAN),
             (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
-            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
         ];
 
         for (x, y, res) in cases {
             let val = f(x, y);
             assert_biteq!(val, res, "fminimum({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between NaNs does not matter
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::ZERO, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_ZERO, F::NEG_NAN).is_nan());
+        assert!(f(F::ONE, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_ONE, F::NEG_NAN).is_nan());
+        assert!(f(F::INFINITY, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_INFINITY, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::ZERO).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_ZERO).is_nan());
+        assert!(f(F::NEG_NAN, F::ONE).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_ONE).is_nan());
+        assert!(f(F::NEG_NAN, F::INFINITY).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_INFINITY).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
@@ -119,24 +172,77 @@ mod tests {
     fn fmaximum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
             (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
             (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::ZERO, F::INFINITY, F::INFINITY),
+            (F::ZERO, F::NEG_INFINITY, F::ZERO),
+            (F::ZERO, F::NAN, F::NAN),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::ONE),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::INFINITY, F::INFINITY),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NAN, F::NAN),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ONE, F::NEG_ZERO, F::ONE),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::ONE),
+            (F::ONE, F::INFINITY, F::INFINITY),
+            (F::ONE, F::NEG_INFINITY, F::ONE),
+            (F::ONE, F::NAN, F::NAN),
             (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ONE, F::ONE, F::ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::INFINITY),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NAN, F::NAN),
             (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::INFINITY, F::NEG_ZERO, F::INFINITY),
+            (F::INFINITY, F::ONE, F::INFINITY),
+            (F::INFINITY, F::NEG_ONE, F::INFINITY),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::INFINITY),
+            (F::INFINITY, F::NAN, F::NAN),
             (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_INFINITY, F::ONE, F::ONE),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_INFINITY, F::INFINITY, F::INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NAN),
             (F::NAN, F::ZERO, F::NAN),
-            (F::ZERO, F::NAN, F::NAN),
+            (F::NAN, F::NEG_ZERO, F::NAN),
+            (F::NAN, F::ONE, F::NAN),
+            (F::NAN, F::NEG_ONE, F::NAN),
+            (F::NAN, F::INFINITY, F::NAN),
+            (F::NAN, F::NEG_INFINITY, F::NAN),
             (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::ZERO),
-            (F::NEG_ZERO, F::ZERO, F::ZERO),
         ];
 
         for (x, y, res) in cases {
             let val = f(x, y);
             assert_biteq!(val, res, "fmaximum({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between NaNs does not matter
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::ZERO, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_ZERO, F::NEG_NAN).is_nan());
+        assert!(f(F::ONE, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_ONE, F::NEG_NAN).is_nan());
+        assert!(f(F::INFINITY, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_INFINITY, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::ZERO).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_ZERO).is_nan());
+        assert!(f(F::NEG_NAN, F::ONE).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_ONE).is_nan());
+        assert!(f(F::NEG_NAN, F::INFINITY).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_INFINITY).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs
index 180d21f72..fadf93418 100644
--- a/libm/src/math/fminimum_fmaximum_num.rs
+++ b/libm/src/math/fminimum_fmaximum_num.rs
@@ -74,24 +74,77 @@ mod tests {
     fn fminimum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
             (F::ZERO, F::ONE, F::ZERO),
-            (F::ONE, F::ZERO, F::ZERO),
             (F::ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::ZERO, F::INFINITY, F::ZERO),
+            (F::ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::ZERO, F::NEG_NAN, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ZERO, F::INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ZERO, F::NAN, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO),
+            (F::ONE, F::ZERO, F::ZERO),
+            (F::ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::ONE, F::INFINITY, F::ONE),
+            (F::ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::ONE, F::NAN, F::ONE),
+            (F::ONE, F::NEG_NAN, F::ONE),
             (F::NEG_ONE, F::ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ONE),
+            (F::NEG_ONE, F::ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_ONE, F::NAN, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE),
             (F::INFINITY, F::ZERO, F::ZERO),
+            (F::INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::INFINITY, F::ONE, F::ONE),
+            (F::INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::INFINITY, F::NAN, F::INFINITY),
+            (F::INFINITY, F::NEG_NAN, F::INFINITY),
             (F::NEG_INFINITY, F::ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY),
             (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NAN, F::ONE, F::ONE),
+            (F::NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NAN, F::INFINITY, F::INFINITY),
+            (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY),
             (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::NEG_ZERO),
-            (F::NEG_ZERO, F::ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ONE, F::ONE),
+            (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_NAN, F::INFINITY, F::INFINITY),
+            (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY),
         ];
 
-        for (x, y, res) in cases {
-            let val = f(x, y);
-            assert_biteq!(val, res, "fminimum_num({}, {})", Hexf(x), Hexf(y));
+        for (x, y, expected) in cases {
+            let actual = f(x, y);
+            assert_biteq!(actual, expected, "fminimum_num({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between NaNs does not matter
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
@@ -119,24 +172,77 @@ mod tests {
     fn fmaximum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
             (F::ZERO, F::ZERO, F::ZERO),
-            (F::ONE, F::ONE, F::ONE),
+            (F::ZERO, F::NEG_ZERO, F::ZERO),
             (F::ZERO, F::ONE, F::ONE),
-            (F::ONE, F::ZERO, F::ONE),
             (F::ZERO, F::NEG_ONE, F::ZERO),
+            (F::ZERO, F::INFINITY, F::INFINITY),
+            (F::ZERO, F::NEG_INFINITY, F::ZERO),
+            (F::ZERO, F::NAN, F::ZERO),
+            (F::ZERO, F::NEG_NAN, F::ZERO),
+            (F::NEG_ZERO, F::ZERO, F::ZERO),
+            (F::NEG_ZERO, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ZERO, F::ONE, F::ONE),
+            (F::NEG_ZERO, F::NEG_ONE, F::NEG_ZERO),
+            (F::NEG_ZERO, F::INFINITY, F::INFINITY),
+            (F::NEG_ZERO, F::NEG_INFINITY, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NAN, F::NEG_ZERO),
+            (F::NEG_ZERO, F::NEG_NAN, F::NEG_ZERO),
+            (F::ONE, F::ZERO, F::ONE),
+            (F::ONE, F::NEG_ZERO, F::ONE),
+            (F::ONE, F::ONE, F::ONE),
+            (F::ONE, F::NEG_ONE, F::ONE),
+            (F::ONE, F::INFINITY, F::INFINITY),
+            (F::ONE, F::NEG_INFINITY, F::ONE),
+            (F::ONE, F::NAN, F::ONE),
+            (F::ONE, F::NEG_NAN, F::ONE),
             (F::NEG_ONE, F::ZERO, F::ZERO),
+            (F::NEG_ONE, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_ONE, F::ONE, F::ONE),
+            (F::NEG_ONE, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_ONE, F::INFINITY, F::INFINITY),
+            (F::NEG_ONE, F::NEG_INFINITY, F::NEG_ONE),
+            (F::NEG_ONE, F::NAN, F::NEG_ONE),
+            (F::NEG_ONE, F::NEG_NAN, F::NEG_ONE),
             (F::INFINITY, F::ZERO, F::INFINITY),
+            (F::INFINITY, F::NEG_ZERO, F::INFINITY),
+            (F::INFINITY, F::ONE, F::INFINITY),
+            (F::INFINITY, F::NEG_ONE, F::INFINITY),
+            (F::INFINITY, F::INFINITY, F::INFINITY),
+            (F::INFINITY, F::NEG_INFINITY, F::INFINITY),
+            (F::INFINITY, F::NAN, F::INFINITY),
+            (F::INFINITY, F::NEG_NAN, F::INFINITY),
             (F::NEG_INFINITY, F::ZERO, F::ZERO),
+            (F::NEG_INFINITY, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_INFINITY, F::ONE, F::ONE),
+            (F::NEG_INFINITY, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_INFINITY, F::INFINITY, F::INFINITY),
+            (F::NEG_INFINITY, F::NEG_INFINITY, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NAN, F::NEG_INFINITY),
+            (F::NEG_INFINITY, F::NEG_NAN, F::NEG_INFINITY),
             (F::NAN, F::ZERO, F::ZERO),
-            (F::ZERO, F::NAN, F::ZERO),
+            (F::NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NAN, F::ONE, F::ONE),
+            (F::NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NAN, F::INFINITY, F::INFINITY),
+            (F::NAN, F::NEG_INFINITY, F::NEG_INFINITY),
             (F::NAN, F::NAN, F::NAN),
-            (F::ZERO, F::NEG_ZERO, F::ZERO),
-            (F::NEG_ZERO, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::ZERO, F::ZERO),
+            (F::NEG_NAN, F::NEG_ZERO, F::NEG_ZERO),
+            (F::NEG_NAN, F::ONE, F::ONE),
+            (F::NEG_NAN, F::NEG_ONE, F::NEG_ONE),
+            (F::NEG_NAN, F::INFINITY, F::INFINITY),
+            (F::NEG_NAN, F::NEG_INFINITY, F::NEG_INFINITY),
         ];
 
-        for (x, y, res) in cases {
-            let val = f(x, y);
-            assert_biteq!(val, res, "fmaximum_num({}, {})", Hexf(x), Hexf(y));
+        for (x, y, expected) in cases {
+            let actual = f(x, y);
+            assert_biteq!(actual, expected, "fmaximum_num({}, {})", Hexf(x), Hexf(y));
         }
+
+        // Ordering between NaNs does not matter
+        assert!(f(F::NAN, F::NEG_NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NAN).is_nan());
+        assert!(f(F::NEG_NAN, F::NEG_NAN).is_nan());
     }
 
     #[test]
diff --git a/libm/src/math/generic/fmax.rs b/libm/src/math/generic/fmax.rs
index 54207e4b3..b05804704 100644
--- a/libm/src/math/generic/fmax.rs
+++ b/libm/src/math/generic/fmax.rs
@@ -19,6 +19,5 @@ use crate::support::Float;
 #[inline]
 pub fn fmax<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() || x < y { y } else { x };
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fmaximum.rs b/libm/src/math/generic/fmaximum.rs
index 898828b80..55a031e18 100644
--- a/libm/src/math/generic/fmaximum.rs
+++ b/libm/src/math/generic/fmaximum.rs
@@ -4,8 +4,8 @@
 //! Per the spec, returns the canonicalized result of:
 //! - `x` if `x > y`
 //! - `y` if `y > x`
+//! - +0.0 if x and y are zero with opposite signs
 //! - qNaN if either operation is NaN
-//! - Logic following +0.0 > -0.0
 //!
 //! Excluded from our implementation is sNaN handling.
 
@@ -23,6 +23,5 @@ pub fn fmaximum<F: Float>(x: F, y: F) -> F {
         y
     };
 
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fmaximum_num.rs b/libm/src/math/generic/fmaximum_num.rs
index 05df6cbd4..2dc60b2d2 100644
--- a/libm/src/math/generic/fmaximum_num.rs
+++ b/libm/src/math/generic/fmaximum_num.rs
@@ -4,10 +4,10 @@
 //! Per the spec, returns:
 //! - `x` if `x > y`
 //! - `y` if `y > x`
-//! - Non-NaN if one operand is NaN
-//! - Logic following +0.0 > -0.0
+//! - +0.0 if x and y are zero with opposite signs
 //! - Either `x` or `y` if `x == y` and the signs are the same
-//! - qNaN if either operand is a NaN
+//! - Non-NaN if one operand is NaN
+//! - qNaN if both operands are NaNx
 //!
 //! Excluded from our implementation is sNaN handling.
 
@@ -15,12 +15,15 @@ use crate::support::Float;
 
 #[inline]
 pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
-    let res = if x.is_nan() || x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) {
+    let res = if x > y || y.is_nan() {
+        x
+    } else if y > x || x.is_nan() {
         y
-    } else {
+    } else if x.is_sign_positive() {
         x
+    } else {
+        y
     };
 
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fmin.rs b/libm/src/math/generic/fmin.rs
index 0f86364d2..e2245bf9e 100644
--- a/libm/src/math/generic/fmin.rs
+++ b/libm/src/math/generic/fmin.rs
@@ -19,6 +19,5 @@ use crate::support::Float;
 #[inline]
 pub fn fmin<F: Float>(x: F, y: F) -> F {
     let res = if y.is_nan() || x < y { x } else { y };
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fminimum.rs b/libm/src/math/generic/fminimum.rs
index 8592ac546..aa68b1291 100644
--- a/libm/src/math/generic/fminimum.rs
+++ b/libm/src/math/generic/fminimum.rs
@@ -4,8 +4,8 @@
 //! Per the spec, returns the canonicalized result of:
 //! - `x` if `x < y`
 //! - `y` if `y < x`
+//! - -0.0 if x and y are zero with opposite signs
 //! - qNaN if either operation is NaN
-//! - Logic following +0.0 > -0.0
 //!
 //! Excluded from our implementation is sNaN handling.
 
@@ -23,6 +23,5 @@ pub fn fminimum<F: Float>(x: F, y: F) -> F {
         y
     };
 
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/generic/fminimum_num.rs b/libm/src/math/generic/fminimum_num.rs
index 6777bbf87..265bd4605 100644
--- a/libm/src/math/generic/fminimum_num.rs
+++ b/libm/src/math/generic/fminimum_num.rs
@@ -4,10 +4,10 @@
 //! Per the spec, returns:
 //! - `x` if `x < y`
 //! - `y` if `y < x`
-//! - Non-NaN if one operand is NaN
-//! - Logic following +0.0 > -0.0
+//! - -0.0 if x and y are zero with opposite signs
 //! - Either `x` or `y` if `x == y` and the signs are the same
-//! - qNaN if either operand is a NaN
+//! - Non-NaN if one operand is NaN
+//! - qNaN if both operands are NaNx
 //!
 //! Excluded from our implementation is sNaN handling.
 
@@ -15,12 +15,15 @@ use crate::support::Float;
 
 #[inline]
 pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
-    let res = if y.is_nan() || x < y || (x.biteq(F::NEG_ZERO) && y.is_sign_positive()) {
+    let res = if x > y || x.is_nan() {
+        y
+    } else if y > x || y.is_nan() {
         x
-    } else {
+    } else if x.is_sign_positive() {
         y
+    } else {
+        x
     };
 
-    // Canonicalize
-    res * F::ONE
+    res.canonicalize()
 }
diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs
index dd9f46209..c3e7eeec2 100644
--- a/libm/src/math/support/float_traits.rs
+++ b/libm/src/math/support/float_traits.rs
@@ -190,6 +190,15 @@ pub trait Float:
             Self::ONE.copysign(self)
         }
     }
+
+    /// Make a best-effort attempt to canonicalize the number. Note that this is allowed
+    /// to be a nop and does not always quiet sNaNs.
+    fn canonicalize(self) -> Self {
+        // FIXME: LLVM often removes this. We should determine whether we can remove the operation,
+        // or switch to something based on `llvm.canonicalize` (which has crashes,
+        // <https://github.com/llvm/llvm-project/issues/32650>).
+        self * Self::ONE
+    }
 }
 
 /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
diff --git a/libm/src/math/support/macros.rs b/libm/src/math/support/macros.rs
index 2b8fd580a..550d2e92e 100644
--- a/libm/src/math/support/macros.rs
+++ b/libm/src/math/support/macros.rs
@@ -143,10 +143,12 @@ macro_rules! assert_biteq {
         let bits = $crate::support::Int::leading_zeros(l.to_bits() - l.to_bits());
         assert!(
             $crate::support::Float::biteq(l, r),
-            "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
+            "{}\nl: {l:?} ({lb:#0width$x} {lh})\nr: {r:?} ({rb:#0width$x} {rh})",
             format_args!($($tt)*),
             lb = l.to_bits(),
+            lh = $crate::support::Hexf(l),
             rb = r.to_bits(),
+            rh = $crate::support::Hexf(r),
             width = ((bits / 4) + 2) as usize,
 
         );

From 8fe6945fca528aaded3a464058f44294caab7953 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 13 Jun 2025 15:42:06 +0000
Subject: [PATCH 070/133] Clean up and sort manifest keys

Use a consistent ordering for top-level manifest keys, and remove those
that are now redundant (`homapage` isn't supposed to be the same as
`repository`, and `documentation` automatically points to docs.rs now).
---
 compiler-builtins/Cargo.toml |  9 +++------
 libm/Cargo.toml              | 10 ++++------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index eabb3d625..22e240099 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -1,14 +1,11 @@
 [package]
-authors = ["Jorge Aparicio <japaricious@gmail.com>"]
 name = "compiler_builtins"
 version = "0.1.160"
-license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
-readme = "README.md"
+authors = ["Jorge Aparicio <japaricious@gmail.com>"]
+description = "Compiler intrinsics used by the Rust compiler."
 repository = "https://github.com/rust-lang/compiler-builtins"
-homepage = "https://github.com/rust-lang/compiler-builtins"
-documentation = "https://docs.rs/compiler_builtins"
+license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 edition = "2024"
-description = "Compiler intrinsics used by the Rust compiler."
 links = "compiler-rt"
 
 [lib]
diff --git a/libm/Cargo.toml b/libm/Cargo.toml
index b6fb5efcf..63b4d3c27 100644
--- a/libm/Cargo.toml
+++ b/libm/Cargo.toml
@@ -1,14 +1,12 @@
 [package]
+name = "libm"
+version = "0.2.15"
 authors = ["Jorge Aparicio <jorge@japaric.io>"]
-categories = ["no-std"]
 description = "libm in pure Rust"
-documentation = "https://docs.rs/libm"
+categories = ["no-std"]
 keywords = ["libm", "math"]
-license = "MIT"
-name = "libm"
-readme = "README.md"
 repository = "https://github.com/rust-lang/compiler-builtins"
-version = "0.2.15"
+license = "MIT"
 edition = "2021"
 rust-version = "1.63"
 

From baa4d3f1492e61ee9c08f52b6cf8e8298a6daa33 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 13 Jun 2025 15:44:45 +0000
Subject: [PATCH 071/133] Mark compiler-builtins as `publish = false`

Now that this repository is a subtree, we have no need to continue
publishing `compiler-builtins`.
---
 compiler-builtins/Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 22e240099..dffdcaf94 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -6,6 +6,7 @@ description = "Compiler intrinsics used by the Rust compiler."
 repository = "https://github.com/rust-lang/compiler-builtins"
 license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 edition = "2024"
+publish = false
 links = "compiler-rt"
 
 [lib]

From a18db849f771c94feae009fd6eba39ed571b3756 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 14 Jun 2025 03:38:53 +0000
Subject: [PATCH 072/133] Delete `.release-plz.toml`

The config file is not needed anymore since compiler-builtins is no
longer published. Removing it will resolve a CI failure.
---
 .release-plz.toml | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100644 .release-plz.toml

diff --git a/.release-plz.toml b/.release-plz.toml
deleted file mode 100644
index 8023ade9b..000000000
--- a/.release-plz.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[workspace]
-# As part of the release process, we delete `libm/Cargo.toml`. Since
-# this is only run in CI, we shouldn't need to worry about it.
-allow_dirty = true
-publish_allow_dirty = true
-
-[[package]]
-name = "compiler_builtins"
-semver_check = false
-changelog_include = ["libm"] # libm is included as part of builtins
-
-[[package]]
-name = "libm"

From fc6b151597c855d3e6f466fdeea945625b29648f Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 14 Jun 2025 04:25:55 +0000
Subject: [PATCH 073/133] Update the upstream Rust version

To prepare for merging from rust-lang/rust, set the version file to:

    d087f112b7 Auto merge of #134841 - estebank:serde-attr-4, r=wesleywiser
---
 rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust-version b/rust-version
index e05aaa057..731839835 100644
--- a/rust-version
+++ b/rust-version
@@ -1 +1 @@
-df8102fe5f24f28a918660b0cd918d7331c3896e
+d087f112b7d1323446c7b39a8b616aee7fa56b3d

From 7c46e921c1174e241ab35ec09c76e10867292633 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 14 Jun 2025 06:23:24 +0000
Subject: [PATCH 074/133] Work around out-of-tree testing with a shim crate

Out-of-tree testing is broken with the most recent update from
rust-lang/rust because it makes `compiler-builtins` depend on `core` by
path, which isn't usually available. In order to enable testing outside
of rust-lang/rust, add a new crate `builtins-shim` that uses the same
source as `compiler-builtins` but drops the `core` dependency. This has
replaced `compiler-builtins` as the workspace member and entrypoint for
tests.
---
 Cargo.toml                          |  8 +++-
 builtins-shim/Cargo.toml            | 63 +++++++++++++++++++++++++++++
 builtins-test-intrinsics/Cargo.toml |  2 +-
 builtins-test/Cargo.toml            |  2 +-
 compiler-builtins/Cargo.toml        |  6 +++
 5 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 builtins-shim/Cargo.toml

diff --git a/Cargo.toml b/Cargo.toml
index fb638f2fb..41350c6cb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,8 +1,8 @@
 [workspace]
 resolver = "2"
 members = [
+    "builtins-shim",
     "builtins-test",
-    "compiler-builtins",
     "crates/josh-sync",
     "crates/libm-macros",
     "crates/musl-math-sys",
@@ -14,8 +14,8 @@ members = [
 ]
 
 default-members = [
+    "builtins-shim",
     "builtins-test",
-    "compiler-builtins",
     "crates/libm-macros",
     "libm",
     "libm-test",
@@ -26,6 +26,10 @@ exclude = [
     # and `mangled-names` disabled, which is the opposite of what is needed for
     # other tests, so it makes sense to keep it out of the workspace.
     "builtins-test-intrinsics",
+    # We test via the `builtins-shim` crate, so exclude the `compiler-builtins`
+    # that has a dependency on `core`. See `builtins-shim/Cargo.toml` for more
+    # details.
+    "compiler-builtins",
 ]
 
 [profile.release]
diff --git a/builtins-shim/Cargo.toml b/builtins-shim/Cargo.toml
new file mode 100644
index 000000000..8eb880c6f
--- /dev/null
+++ b/builtins-shim/Cargo.toml
@@ -0,0 +1,63 @@
+# NOTE: Must be kept in sync with `../compiler-builtins/Cargo.toml`.
+#
+# The manifest at `../compiler-builtins` is what actually gets used in the
+# rust-lang/rust tree; however, we can't build it out of tree because it
+# depends on `core` by path, and even optional Cargo dependencies need to be
+# available at build time. So, we work around this by having this "shim"
+# manifest that is identical except for the `core` dependency and forwards
+# to the same sources, which acts as the `compiler-builtins` Cargo entrypoint
+# for out of tree testing
+
+[package]
+name = "compiler_builtins"
+version = "0.1.160"
+authors = ["Jorge Aparicio <japaricious@gmail.com>"]
+description = "Compiler intrinsics used by the Rust compiler."
+repository = "https://github.com/rust-lang/compiler-builtins"
+license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
+edition = "2024"
+publish = false
+links = "compiler-rt"
+
+build = "../compiler-builtins/build.rs"
+
+[lib]
+path = "../compiler-builtins/src/lib.rs"
+bench = false
+doctest = false
+test = false
+
+[build-dependencies]
+cc = { optional = true, version = "1.2" }
+
+[features]
+default = ["compiler-builtins"]
+
+# Enable compilation of C code in compiler-rt, filling in some more optimized
+# implementations and also filling in unimplemented intrinsics
+c = ["dep:cc"]
+
+# Workaround for the Cranelift codegen backend. Disables any implementations
+# which use inline assembly and fall back to pure Rust versions (if available).
+no-asm = []
+
+# Workaround for codegen backends which haven't yet implemented `f16` and
+# `f128` support. Disabled any intrinsics which use those types.
+no-f16-f128 = []
+
+# Flag this library as the unstable compiler-builtins lib
+compiler-builtins = []
+
+# Generate memory-related intrinsics like memcpy
+mem = []
+
+# Mangle all names so this can be linked in with other versions or other
+# compiler-rt implementations. Also used for testing
+mangled-names = []
+
+# Only used in the compiler's build system
+rustc-dep-of-std = ["compiler-builtins"]
+
+# This makes certain traits and function specializations public that
+# are not normally public but are required by the `builtins-test`
+unstable-public-internals = []
diff --git a/builtins-test-intrinsics/Cargo.toml b/builtins-test-intrinsics/Cargo.toml
index 064b7cad2..e73a1f7b1 100644
--- a/builtins-test-intrinsics/Cargo.toml
+++ b/builtins-test-intrinsics/Cargo.toml
@@ -6,7 +6,7 @@ publish = false
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-compiler_builtins = { path = "../compiler-builtins", features = ["compiler-builtins"] }
+compiler_builtins = { path = "../builtins-shim", features = ["compiler-builtins"] }
 panic-handler = { path = "../crates/panic-handler" }
 
 [features]
diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml
index c7742aa24..093d4633f 100644
--- a/builtins-test/Cargo.toml
+++ b/builtins-test/Cargo.toml
@@ -17,7 +17,7 @@ rustc_apfloat = "0.2.2"
 iai-callgrind = { version = "0.14.1", optional = true }
 
 [dependencies.compiler_builtins]
-path = "../compiler-builtins"
+path = "../builtins-shim"
 default-features = false
 features = ["unstable-public-internals"]
 
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 7276a6851..c5446cd76 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -1,3 +1,9 @@
+# NOTE: Must be kept in sync with `../builtins-shim/Cargo.toml`.
+#
+# This manifest is actually used in-tree by rust-lang/rust,
+# `../builtins-shim/Cargo.toml` is used by out-of-tree testing. See the other
+# manifest for further details.
+
 [package]
 name = "compiler_builtins"
 version = "0.1.160"

From 1e2ebebd36d733c0af067187f310d5dc541e782e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 14 Jun 2025 12:08:25 +0200
Subject: [PATCH 075/133] use `is_multiple_of` to check if an addr is aligned

---
 compiler-builtins/src/arm.rs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs
index a7d84e49b..617cc8e50 100644
--- a/compiler-builtins/src/arm.rs
+++ b/compiler-builtins/src/arm.rs
@@ -135,8 +135,8 @@ intrinsics! {
     /// eight bytes.
     #[cfg(not(target_vendor = "apple"))]
     pub unsafe extern "aapcs" fn __aeabi_memcpy8(dst: *mut u8, src: *const u8, n: usize) {
-        debug_assert!(dst.addr() & 7 == 0);
-        debug_assert!(src.addr() & 7 == 0);
+        debug_assert!(dst.addr().is_multiple_of(8));
+        debug_assert!(src.addr().is_multiple_of(8));
 
         // SAFETY: memcpy preconditions apply, less strict alignment.
         unsafe { __aeabi_memcpy4(dst, src, n) };
@@ -161,8 +161,8 @@ intrinsics! {
     /// four bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
     pub unsafe extern "aapcs" fn __aeabi_memmove4(dst: *mut u8, src: *const u8, n: usize) {
-        debug_assert!(dst.addr() & 3 == 0);
-        debug_assert!(src.addr() & 3 == 0);
+        debug_assert!(dst.addr().is_multiple_of(4));
+        debug_assert!(src.addr().is_multiple_of(4));
 
         // SAFETY: same preconditions, less strict aligment.
         unsafe { __aeabi_memmove(dst, src, n) };
@@ -176,8 +176,8 @@ intrinsics! {
     /// eight bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
     pub unsafe extern "aapcs" fn __aeabi_memmove8(dst: *mut u8, src: *const u8, n: usize) {
-        debug_assert!(dst.addr() & 7 == 0);
-        debug_assert!(src.addr() & 7 == 0);
+        debug_assert!(dst.addr().is_multiple_of(8));
+        debug_assert!(src.addr().is_multiple_of(8));
 
         // SAFETY: memmove preconditions apply, less strict alignment.
         unsafe { __aeabi_memmove(dst, src, n) };
@@ -236,7 +236,7 @@ intrinsics! {
     /// eight bytes.
     #[cfg(not(target_vendor = "apple"))]
     pub unsafe extern "aapcs" fn __aeabi_memset8(dst: *mut u8, n: usize, c: i32) {
-        debug_assert!(dst.addr() & 7 == 0);
+        debug_assert!(dst.addr().is_multiple_of(8));
 
         // SAFETY: memset preconditions apply, less strict alignment.
         unsafe { __aeabi_memset4(dst, n, c) };
@@ -261,7 +261,7 @@ intrinsics! {
     /// four bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
     pub unsafe extern "aapcs" fn __aeabi_memclr4(dst: *mut u8, n: usize) {
-        debug_assert!(dst.addr() & 3 == 0);
+        debug_assert!(dst.addr().is_multiple_of(4));
 
         // SAFETY: memclr preconditions apply, less strict alignment.
         unsafe { __aeabi_memset4(dst, n, 0) };
@@ -275,7 +275,7 @@ intrinsics! {
     /// eight bytes.
     #[cfg(not(any(target_vendor = "apple", target_env = "msvc")))]
     pub unsafe extern "aapcs" fn __aeabi_memclr8(dst: *mut u8, n: usize) {
-        debug_assert!(dst.addr() & 7 == 0);
+        debug_assert!(dst.addr().is_multiple_of(8));
 
         // SAFETY: memclr preconditions apply, less strict alignment.
         unsafe { __aeabi_memset4(dst, n, 0) };

From 64b37a871033a0232cd062ffba3a0a77c3263817 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sat, 14 Jun 2025 20:17:23 +0200
Subject: [PATCH 076/133] use `extern "custom"` on naked functions with a
 custom calling convention

---
 compiler-builtins/src/aarch64.rs    |  2 +-
 compiler-builtins/src/arm.rs        | 13 ++++++-------
 compiler-builtins/src/int/udiv.rs   |  2 +-
 compiler-builtins/src/lib.rs        |  1 +
 compiler-builtins/src/probestack.rs | 15 +++------------
 compiler-builtins/src/x86.rs        | 10 +++++-----
 compiler-builtins/src/x86_64.rs     |  4 ++--
 7 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/compiler-builtins/src/aarch64.rs b/compiler-builtins/src/aarch64.rs
index 80392187c..a72b30d29 100644
--- a/compiler-builtins/src/aarch64.rs
+++ b/compiler-builtins/src/aarch64.rs
@@ -5,7 +5,7 @@ use core::intrinsics;
 intrinsics! {
     #[unsafe(naked)]
     #[cfg(all(target_os = "uefi", not(feature = "no-asm")))]
-    pub unsafe extern "C" fn __chkstk() {
+    pub unsafe extern "custom" fn __chkstk() {
         core::arch::naked_asm!(
             ".p2align 2",
             "lsl    x16, x15, #4",
diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs
index 617cc8e50..fbec93ca4 100644
--- a/compiler-builtins/src/arm.rs
+++ b/compiler-builtins/src/arm.rs
@@ -9,11 +9,10 @@ unsafe extern "C" {
 }
 
 // SAFETY: these are defined in compiler-builtins
-// FIXME(extern_custom), this isn't always the correct ABI
-unsafe extern "aapcs" {
+unsafe extern "custom" {
     // AAPCS is not always the correct ABI for these intrinsics, but we only use this to
     // forward another `__aeabi_` call so it doesn't matter.
-    fn __aeabi_idiv(a: i32, b: i32) -> i32;
+    fn __aeabi_idiv();
 }
 
 intrinsics! {
@@ -21,7 +20,7 @@ intrinsics! {
     // custom calling convention which can't be implemented using a normal Rust function.
     #[unsafe(naked)]
     #[cfg(not(target_env = "msvc"))]
-    pub unsafe extern "C" fn __aeabi_uidivmod() {
+    pub unsafe extern "custom" fn __aeabi_uidivmod() {
         core::arch::naked_asm!(
             "push {{lr}}",
             "sub sp, sp, #4",
@@ -35,7 +34,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    pub unsafe extern "C" fn __aeabi_uldivmod() {
+    pub unsafe extern "custom" fn __aeabi_uldivmod() {
         core::arch::naked_asm!(
             "push {{r4, lr}}",
             "sub sp, sp, #16",
@@ -51,7 +50,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    pub unsafe extern "C" fn __aeabi_idivmod() {
+    pub unsafe extern "custom" fn __aeabi_idivmod() {
         core::arch::naked_asm!(
             "push {{r0, r1, r4, lr}}",
             "bl {trampoline}",
@@ -64,7 +63,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    pub unsafe extern "C" fn __aeabi_ldivmod() {
+    pub unsafe extern "custom" fn __aeabi_ldivmod() {
         core::arch::naked_asm!(
             "push {{r4, lr}}",
             "sub sp, sp, #16",
diff --git a/compiler-builtins/src/int/udiv.rs b/compiler-builtins/src/int/udiv.rs
index b9dee63c4..017a81ac9 100644
--- a/compiler-builtins/src/int/udiv.rs
+++ b/compiler-builtins/src/int/udiv.rs
@@ -44,7 +44,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    pub unsafe extern "C" fn __udivmodqi4() {
+    pub unsafe extern "custom" fn __udivmodqi4() {
         // compute unsigned 8-bit `n / d` and `n % d`.
         //
         // Note: GCC implements a [non-standard calling convention](https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention) for this function.
diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs
index 1cec39d8b..dd9920cae 100644
--- a/compiler-builtins/src/lib.rs
+++ b/compiler-builtins/src/lib.rs
@@ -1,5 +1,6 @@
 #![cfg_attr(feature = "compiler-builtins", compiler_builtins)]
 #![cfg_attr(all(target_family = "wasm"), feature(wasm_numeric_instr))]
+#![feature(abi_custom)]
 #![feature(abi_unadjusted)]
 #![feature(asm_experimental_arch)]
 #![feature(cfg_target_has_atomic)]
diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index 1441fd73b..1d0010842 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -52,15 +52,12 @@
 // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
 // ensuring that if any pages are unmapped we'll make a page fault.
 //
-// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
-// it does not actually match `extern "C"`.
-//
 // The ABI here is that the stack frame size is located in `%rax`. Upon
 // return we're not supposed to modify `%rsp` or `%rax`.
 #[cfg(target_arch = "x86_64")]
 #[unsafe(naked)]
 #[rustc_std_internal_symbol]
-pub unsafe extern "C" fn __rust_probestack() {
+pub unsafe extern "custom" fn __rust_probestack() {
     #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))]
     macro_rules! ret {
         () => {
@@ -144,13 +141,10 @@ pub unsafe extern "C" fn __rust_probestack() {
 // that on Unix we're expected to restore everything as it was, this
 // function basically can't tamper with anything.
 //
-// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
-// it does not actually match `extern "C"`.
-//
 // The ABI here is the same as x86_64, except everything is 32-bits large.
 #[unsafe(naked)]
 #[rustc_std_internal_symbol]
-pub unsafe extern "C" fn __rust_probestack() {
+pub unsafe extern "custom" fn __rust_probestack() {
     core::arch::naked_asm!(
         "
             .cfi_startproc
@@ -192,9 +186,6 @@ pub unsafe extern "C" fn __rust_probestack() {
 // probestack function will also do things like _chkstk in MSVC.
 // So we need to sub %ax %sp in probestack when arch is x86.
 //
-// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
-// it does not actually match `extern "C"`.
-//
 // REF: Rust commit(74e80468347)
 // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
 // Comments in LLVM:
@@ -203,7 +194,7 @@ pub unsafe extern "C" fn __rust_probestack() {
 //   themselves.
 #[unsafe(naked)]
 #[rustc_std_internal_symbol]
-pub unsafe extern "C" fn __rust_probestack() {
+pub unsafe extern "custom" fn __rust_probestack() {
     core::arch::naked_asm!(
         "
             .cfi_startproc
diff --git a/compiler-builtins/src/x86.rs b/compiler-builtins/src/x86.rs
index 01152d9c7..16e50922a 100644
--- a/compiler-builtins/src/x86.rs
+++ b/compiler-builtins/src/x86.rs
@@ -2,7 +2,7 @@
 
 use core::intrinsics;
 
-// NOTE These functions are implemented using assembly because they using a custom
+// NOTE These functions are implemented using assembly because they use a custom
 // calling convention which can't be implemented using a normal Rust function
 
 // NOTE These functions are never mangled as they are not tested against compiler-rt
@@ -13,10 +13,10 @@ intrinsics! {
         any(all(windows, target_env = "gnu"), target_os = "uefi"),
         not(feature = "no-asm")
     ))]
-    pub unsafe extern "C" fn __chkstk() {
+    pub unsafe extern "custom" fn __chkstk() {
         core::arch::naked_asm!(
-            "jmp __alloca", // Jump to __alloca since fallthrough may be unreliable"
-            options(att_syntax)
+            "jmp {}", // Jump to __alloca since fallthrough may be unreliable"
+            sym crate::x86::_alloca::_alloca,
         );
     }
 
@@ -25,7 +25,7 @@ intrinsics! {
         any(all(windows, target_env = "gnu"), target_os = "uefi"),
         not(feature = "no-asm")
     ))]
-    pub unsafe extern "C" fn _alloca() {
+    pub unsafe extern "custom" fn _alloca() {
         // __chkstk and _alloca are the same function
         core::arch::naked_asm!(
             "push   %ecx",
diff --git a/compiler-builtins/src/x86_64.rs b/compiler-builtins/src/x86_64.rs
index fc1190f79..9b7133b48 100644
--- a/compiler-builtins/src/x86_64.rs
+++ b/compiler-builtins/src/x86_64.rs
@@ -2,7 +2,7 @@
 
 use core::intrinsics;
 
-// NOTE These functions are implemented using assembly because they using a custom
+// NOTE These functions are implemented using assembly because they use a custom
 // calling convention which can't be implemented using a normal Rust function
 
 // NOTE These functions are never mangled as they are not tested against compiler-rt
@@ -17,7 +17,7 @@ intrinsics! {
         ),
         not(feature = "no-asm")
     ))]
-    pub unsafe extern "C" fn ___chkstk_ms() {
+    pub unsafe extern "custom" fn ___chkstk_ms() {
         core::arch::naked_asm!(
             "push   %rcx",
             "push   %rax",

From 41b5e34cd9b265ea0ee627599c0b3544cb4ece26 Mon Sep 17 00:00:00 2001
From: Urgau <3616612+Urgau@users.noreply.github.com>
Date: Sun, 15 Jun 2025 00:43:17 +0200
Subject: [PATCH 077/133] Add minimal triagebot config

This PR adds a minimal `triagebot.toml` config to make contributions to
this repository respect upstream rust-lang/rust conventions and avoid
issues when syncing this subtree.
---
 triagebot.toml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 triagebot.toml

diff --git a/triagebot.toml b/triagebot.toml
new file mode 100644
index 000000000..ecc05da01
--- /dev/null
+++ b/triagebot.toml
@@ -0,0 +1,21 @@
+## See <https://forge.rust-lang.org/triagebot/index.html> for documentation
+## of these features.
+
+# Warns when a PR contains merge commits
+# Documentation at: https://forge.rust-lang.org/triagebot/no-merge.html
+[no-merges]
+exclude_titles = ["Update from"]
+
+# Canonicalize issue numbers to avoid closing the wrong issue
+# when commits are included in subtrees, as well as warning links in commits.
+# Documentation at: https://forge.rust-lang.org/triagebot/issue-links.html
+[issue-links]
+check-commits = false
+
+# Prevents mentions in commits to avoid users being spammed
+# Documentation at: https://forge.rust-lang.org/triagebot/no-mentions.html
+[no-mentions]
+
+# Enable issue transfers within the org
+# Documentation at: https://forge.rust-lang.org/triagebot/transfer.html
+[transfer]

From 267ae1fa43785448bfb0aebafc4e352c936dd4cf Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 14 Jun 2025 23:39:24 +0000
Subject: [PATCH 078/133] symcheck: Add a wrapper around an archive

Rather than re-opening the archive file for each check, add a wrapper
that keeps the data in memory. Additionally, collect the `--target`
argument so it can be used within this crate.
---
 crates/symbol-check/src/main.rs | 104 ++++++++++++++++++++++++--------
 1 file changed, 80 insertions(+), 24 deletions(-)

diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index d83cd318d..843a943fb 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -8,7 +8,9 @@ use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 
 use object::read::archive::{ArchiveFile, ArchiveMember};
-use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection};
+use object::{
+    File as ObjFile, Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection,
+};
 use serde_json::Value;
 
 const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"];
@@ -28,13 +30,11 @@ fn main() {
     let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
 
     match &args_ref[1..] {
-        ["build-and-check", rest @ ..] if !rest.is_empty() => {
-            let paths = exec_cargo_with_args(rest);
-            for path in paths {
-                println!("Checking {}", path.display());
-                verify_no_duplicates(&path);
-                verify_core_symbols(&path);
-            }
+        ["build-and-check", "--target", target, args @ ..] if !args.is_empty() => {
+            run_build_and_check(Some(target), args);
+        }
+        ["build-and-check", args @ ..] if !args.is_empty() => {
+            run_build_and_check(None, args);
         }
         _ => {
             println!("{USAGE}");
@@ -43,12 +43,42 @@ fn main() {
     }
 }
 
+fn run_build_and_check(target: Option<&str>, args: &[&str]) {
+    let paths = exec_cargo_with_args(target, args);
+    for path in paths {
+        println!("Checking {}", path.display());
+        let archive = Archive::from_path(&path);
+
+        verify_no_duplicates(&archive);
+        verify_core_symbols(&archive);
+    }
+}
+
+fn host_target() -> String {
+    let out = Command::new("rustc")
+        .arg("--version")
+        .arg("--verbose")
+        .output()
+        .unwrap();
+    assert!(out.status.success());
+    let out = String::from_utf8(out.stdout).unwrap();
+    out.lines()
+        .find_map(|s| s.strip_prefix("host: "))
+        .unwrap()
+        .to_owned()
+}
+
 /// Run `cargo build` with the provided additional arguments, collecting the list of created
 /// libraries.
-fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
+fn exec_cargo_with_args(target: Option<&str>, args: &[&str]) -> Vec<PathBuf> {
+    let mut host = String::new();
+    let target = target.unwrap_or_else(|| {
+        host = host_target();
+        host.as_str()
+    });
+
     let mut cmd = Command::new("cargo");
-    cmd.arg("build")
-        .arg("--message-format=json")
+    cmd.args(["build", "--target", target, "--message-format=json"])
         .args(args)
         .stdout(Stdio::piped());
 
@@ -133,12 +163,12 @@ impl SymInfo {
 /// Note that this will also locate cases where a symbol is weakly defined in more than one place.
 /// Technically there are no linker errors that will come from this, but it keeps our binary more
 /// straightforward and saves some distribution size.
-fn verify_no_duplicates(path: &Path) {
+fn verify_no_duplicates(archive: &Archive) {
     let mut syms = BTreeMap::<String, SymInfo>::new();
     let mut dups = Vec::new();
     let mut found_any = false;
 
-    for_each_symbol(path, |symbol, member| {
+    archive.for_each_symbol(|symbol, member| {
         // Only check defined globals
         if !symbol.is_global() || symbol.is_undefined() {
             return;
@@ -185,12 +215,12 @@ fn verify_no_duplicates(path: &Path) {
 }
 
 /// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
-fn verify_core_symbols(path: &Path) {
+fn verify_core_symbols(archive: &Archive) {
     let mut defined = BTreeSet::new();
     let mut undefined = Vec::new();
     let mut has_symbols = false;
 
-    for_each_symbol(path, |symbol, member| {
+    archive.for_each_symbol(|symbol, member| {
         has_symbols = true;
 
         // Find only symbols from `core`
@@ -219,14 +249,40 @@ fn verify_core_symbols(path: &Path) {
     println!("    success: no undefined references to core found");
 }
 
-/// For a given archive path, do something with each symbol.
-fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) {
-    let data = fs::read(path).expect("reading file failed");
-    let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed");
-    for member in archive.members() {
-        let member = member.expect("failed to access member");
-        let obj_data = member.data(&*data).expect("failed to access object");
-        let obj = object::File::parse(obj_data).expect("failed to parse object");
-        obj.symbols().for_each(|sym| f(sym, &member));
+/// Thin wrapper for owning data used by `object`.
+struct Archive {
+    data: Vec<u8>,
+}
+
+impl Archive {
+    fn from_path(path: &Path) -> Self {
+        Self {
+            data: fs::read(path).expect("reading file failed"),
+        }
+    }
+
+    fn file(&self) -> ArchiveFile<'_> {
+        ArchiveFile::parse(self.data.as_slice()).expect("archive parse failed")
+    }
+
+    /// For a given archive, do something with each object file.
+    fn for_each_object(&self, mut f: impl FnMut(ObjFile, &ArchiveMember)) {
+        let archive = self.file();
+
+        for member in archive.members() {
+            let member = member.expect("failed to access member");
+            let obj_data = member
+                .data(self.data.as_slice())
+                .expect("failed to access object");
+            let obj = ObjFile::parse(obj_data).expect("failed to parse object");
+            f(obj, &member);
+        }
+    }
+
+    /// For a given archive, do something with each symbol.
+    fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ArchiveMember)) {
+        self.for_each_object(|obj, member| {
+            obj.symbols().for_each(|sym| f(sym, member));
+        });
     }
 }

From 674910e0fa6f0fb2cc055f4f7051ff0eb53c7735 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Tue, 24 Jun 2025 20:40:08 +0200
Subject: [PATCH 079/133] Use `asm_cfg` in `probestack`

cc https://www.github.com/rust-lang/rust/issues/140364
---
 compiler-builtins/src/lib.rs        |  1 +
 compiler-builtins/src/probestack.rs | 35 ++++++++++-------------------
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs
index dd9920cae..fe0ad81dd 100644
--- a/compiler-builtins/src/lib.rs
+++ b/compiler-builtins/src/lib.rs
@@ -7,6 +7,7 @@
 #![feature(compiler_builtins)]
 #![feature(core_intrinsics)]
 #![feature(linkage)]
+#![feature(asm_cfg)]
 #![feature(naked_functions)]
 #![feature(repr_simd)]
 #![feature(macro_metavar_expr_concat)]
diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index 1d0010842..f4105dde5 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -58,27 +58,6 @@
 #[unsafe(naked)]
 #[rustc_std_internal_symbol]
 pub unsafe extern "custom" fn __rust_probestack() {
-    #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))]
-    macro_rules! ret {
-        () => {
-            "ret"
-        };
-    }
-
-    #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))]
-    macro_rules! ret {
-        // for this target, [manually patch for LVI].
-        //
-        // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
-        () => {
-            "
-            pop %r11
-            lfence
-            jmp *%r11
-            "
-        };
-    }
-
     core::arch::naked_asm!(
         "
             .cfi_startproc
@@ -128,8 +107,18 @@ pub unsafe extern "custom" fn __rust_probestack() {
             .cfi_def_cfa_register %rsp
             .cfi_adjust_cfa_offset -8
     ",
-        ret!(),
-        "
+    #[cfg(not(all(target_env = "sgx", target_vendor = "fortanix")))]
+    "       ret",
+    #[cfg(all(target_env = "sgx", target_vendor = "fortanix"))]
+    "
+            // for this target, [manually patch for LVI].
+            //
+            // [manually patch for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
+            pop %r11
+            lfence
+            jmp *%r11
+    ",
+    "
             .cfi_endproc
     ",
         options(att_syntax)

From 0bbec7238890242e7754d6ff604c989666328d83 Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Sun, 29 Jun 2025 06:53:07 +0300
Subject: [PATCH 080/133] apply suggestions for clippy::manual_is_multiple_of
 in libm-test

---
 libm-test/tests/z_extensive/run.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libm-test/tests/z_extensive/run.rs b/libm-test/tests/z_extensive/run.rs
index f2ba6a4a0..e04e00c6d 100644
--- a/libm-test/tests/z_extensive/run.rs
+++ b/libm-test/tests/z_extensive/run.rs
@@ -197,15 +197,15 @@ impl Progress {
 
     fn update(&self, completed: u64, input: impl fmt::Debug) {
         // Infrequently update the progress bar.
-        if completed % 20_000 == 0 {
+        if completed.is_multiple_of(20_000) {
             self.pb.set_position(completed);
         }
 
-        if completed % 500_000 == 0 {
+        if completed.is_multiple_of(500_000) {
             self.pb.set_message(format!("input: {input:<24?}"));
         }
 
-        if !self.is_tty && completed % 5_000_000 == 0 {
+        if !self.is_tty && completed.is_multiple_of(5_000_000) {
             let len = self.pb.length().unwrap_or_default();
             eprintln!(
                 "[{elapsed:3?}s {percent:3.0}%] {name} \

From cc53499ebbe3d65ba247bc8a2da7e5984c039906 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 14 Jun 2025 08:44:37 +0000
Subject: [PATCH 081/133] josh-sync: Replace `#xxxx`-style links in messages

Often our short summaries will pick up a Bors "Auto merge of #xxxx ...`
commit message. Replace these with something like `rust-lang/rust#1234`
to avoid broken links when going between repositories.
---
 crates/josh-sync/Cargo.toml  |  1 +
 crates/josh-sync/src/sync.rs | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/crates/josh-sync/Cargo.toml b/crates/josh-sync/Cargo.toml
index 1f3bb376d..8e2e891db 100644
--- a/crates/josh-sync/Cargo.toml
+++ b/crates/josh-sync/Cargo.toml
@@ -5,3 +5,4 @@ publish = false
 
 [dependencies]
 directories = "6.0.0"
+regex-lite = "0.1.6"
diff --git a/crates/josh-sync/src/sync.rs b/crates/josh-sync/src/sync.rs
index 003cf187d..2d89d2d1c 100644
--- a/crates/josh-sync/src/sync.rs
+++ b/crates/josh-sync/src/sync.rs
@@ -1,8 +1,11 @@
+use std::borrow::Cow;
 use std::net::{SocketAddr, TcpStream};
 use std::process::{Command, Stdio, exit};
 use std::time::Duration;
 use std::{env, fs, process, thread};
 
+use regex_lite::Regex;
+
 const JOSH_PORT: u16 = 42042;
 const DEFAULT_PR_BRANCH: &str = "update-builtins";
 
@@ -77,6 +80,7 @@ impl GitSync {
             "--depth=1",
         ]);
         let new_summary = check_output(["git", "log", "-1", "--format=%h %s", &new_upstream_base]);
+        let new_summary = replace_references(&new_summary, &self.upstream_repo);
 
         // Update rust-version file. As a separate commit, since making it part of
         // the merge has confused the heck out of josh in the past.
@@ -297,6 +301,13 @@ fn check_output_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) ->
     String::from_utf8(out.stdout.trim_ascii().to_vec()).expect("non-UTF8 output")
 }
 
+/// Replace `#1234`-style issue/PR references with `repo#1234` to ensure links work across
+/// repositories.
+fn replace_references<'a>(s: &'a str, repo: &str) -> Cow<'a, str> {
+    let re = Regex::new(r"\B(?P<id>#\d+)\b").unwrap();
+    re.replace(s, &format!("{repo}$id"))
+}
+
 /// Create a wrapper that stops Josh on drop.
 pub struct Josh(process::Child);
 
@@ -369,3 +380,22 @@ impl Drop for Josh {
         self.0.kill().expect("failed to SIGKILL josh-proxy");
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_replace() {
+        assert_eq!(replace_references("#1234", "r-l/rust"), "r-l/rust#1234");
+        assert_eq!(replace_references("#1234x", "r-l/rust"), "#1234x");
+        assert_eq!(
+            replace_references("merge #1234", "r-l/rust"),
+            "merge r-l/rust#1234"
+        );
+        assert_eq!(
+            replace_references("foo/bar#1234", "r-l/rust"),
+            "foo/bar#1234"
+        );
+    }
+}

From 95abb0e02db3128256297203507c8e1da9c96696 Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Tue, 1 Jul 2025 11:07:48 +0300
Subject: [PATCH 082/133] libm: Improved integer utilities, implement shifts
 and bug fixes for i256 and u256

`i256` and `u256`
- operators now use the same overflow convention as primitives
- implement `<<` and `-` (previously just `>>` and `+`)
- implement `Ord` correctly (the previous `PartialOrd` was broken)
- correct `i256::SIGNED` to `true`

The `Int`-trait is extended with `trailing_zeros`, `carrying_add`, and
`borrowing_sub`.
---
 libm-test/benches/icount.rs         |  18 +++-
 libm-test/tests/u256.rs             |  46 +++++++++-
 libm/src/math/support/big.rs        | 133 +++++++++++++++++-----------
 libm/src/math/support/big/tests.rs  |  63 ++++++++++++-
 libm/src/math/support/int_traits.rs |  23 ++++-
 5 files changed, 223 insertions(+), 60 deletions(-)

diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs
index a0928a29f..02ee13f80 100644
--- a/libm-test/benches/icount.rs
+++ b/libm-test/benches/icount.rs
@@ -119,6 +119,22 @@ fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
     }
 }
 
+#[library_benchmark]
+#[bench::linspace(setup_u256_add())]
+fn icount_bench_u256_sub(cases: Vec<(u256, u256)>) {
+    for (x, y) in cases.iter().copied() {
+        black_box(black_box(x) - black_box(y));
+    }
+}
+
+#[library_benchmark]
+#[bench::linspace(setup_u256_shift())]
+fn icount_bench_u256_shl(cases: Vec<(u256, u32)>) {
+    for (x, y) in cases.iter().copied() {
+        black_box(black_box(x) << black_box(y));
+    }
+}
+
 #[library_benchmark]
 #[bench::linspace(setup_u256_shift())]
 fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
@@ -129,7 +145,7 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
 
 library_benchmark_group!(
     name = icount_bench_u128_group;
-    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_shr
+    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
 );
 
 #[library_benchmark]
diff --git a/libm-test/tests/u256.rs b/libm-test/tests/u256.rs
index 8cbb3ad22..d1c5cfbcc 100644
--- a/libm-test/tests/u256.rs
+++ b/libm-test/tests/u256.rs
@@ -111,12 +111,54 @@ fn mp_u256_add() {
         let y = random_u256(&mut rng);
         assign_bigint(&mut bx, x);
         assign_bigint(&mut by, y);
-        let actual = x + y;
+        let actual = if u256::MAX - x >= y {
+            x + y
+        } else {
+            // otherwise (u256::MAX - x) < y, so the wrapped result is
+            // (x + y) - (u256::MAX + 1) == y - (u256::MAX - x) - 1
+            y - (u256::MAX - x) - 1_u128.widen()
+        };
         bx += &by;
         check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
     }
 }
 
+#[test]
+fn mp_u256_sub() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let y = random_u256(&mut rng);
+        assign_bigint(&mut bx, x);
+        assign_bigint(&mut by, y);
+
+        // since the operators (may) panic on overflow,
+        // we should test something that doesn't
+        let actual = if x >= y { x - y } else { y - x };
+        bx -= &by;
+        bx.abs_mut();
+        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_u256_shl() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let shift: u32 = rng.random_range(0..256);
+        assign_bigint(&mut bx, x);
+        let actual = x << shift;
+        bx <<= shift;
+        check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
+    }
+}
+
 #[test]
 fn mp_u256_shr() {
     let mut rng = ChaCha8Rng::from_seed(*SEED);
@@ -124,7 +166,7 @@ fn mp_u256_shr() {
 
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
-        let shift: u32 = rng.random_range(0..255);
+        let shift: u32 = rng.random_range(0..256);
         assign_bigint(&mut bx, x);
         let actual = x >> shift;
         bx >>= shift;
diff --git a/libm/src/math/support/big.rs b/libm/src/math/support/big.rs
index 8a52d86cc..b7f128542 100644
--- a/libm/src/math/support/big.rs
+++ b/libm/src/math/support/big.rs
@@ -11,10 +11,10 @@ const U128_LO_MASK: u128 = u64::MAX as u128;
 
 /// A 256-bit unsigned integer represented as two 128-bit native-endian limbs.
 #[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
 pub struct u256 {
-    pub lo: u128,
     pub hi: u128,
+    pub lo: u128,
 }
 
 impl u256 {
@@ -28,17 +28,17 @@ impl u256 {
     pub fn signed(self) -> i256 {
         i256 {
             lo: self.lo,
-            hi: self.hi,
+            hi: self.hi as i128,
         }
     }
 }
 
 /// A 256-bit signed integer represented as two 128-bit native-endian limbs.
 #[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
 pub struct i256 {
+    pub hi: i128,
     pub lo: u128,
-    pub hi: u128,
 }
 
 impl i256 {
@@ -47,7 +47,7 @@ impl i256 {
     pub fn unsigned(self) -> u256 {
         u256 {
             lo: self.lo,
-            hi: self.hi,
+            hi: self.hi as u128,
         }
     }
 }
@@ -73,17 +73,17 @@ impl MinInt for i256 {
 
     type Unsigned = u256;
 
-    const SIGNED: bool = false;
+    const SIGNED: bool = true;
     const BITS: u32 = 256;
     const ZERO: Self = Self { lo: 0, hi: 0 };
     const ONE: Self = Self { lo: 1, hi: 0 };
     const MIN: Self = Self {
-        lo: 0,
-        hi: 1 << 127,
+        lo: u128::MIN,
+        hi: i128::MIN,
     };
     const MAX: Self = Self {
         lo: u128::MAX,
-        hi: u128::MAX >> 1,
+        hi: i128::MAX,
     };
 }
 
@@ -109,60 +109,86 @@ macro_rules! impl_common {
             }
         }
 
-        impl ops::Shl<u32> for $ty {
+        impl ops::Add<Self> for $ty {
             type Output = Self;
 
-            fn shl(self, _rhs: u32) -> Self::Output {
-                unimplemented!("only used to meet trait bounds")
+            fn add(self, rhs: Self) -> Self::Output {
+                let (lo, carry) = self.lo.overflowing_add(rhs.lo);
+                let (hi, of) = Int::carrying_add(self.hi, rhs.hi, carry);
+                debug_assert!(!of, "attempt to add with overflow");
+                Self { lo, hi }
             }
         }
-    };
-}
 
-impl_common!(i256);
-impl_common!(u256);
+        impl ops::Sub<Self> for $ty {
+            type Output = Self;
 
-impl ops::Add<Self> for u256 {
-    type Output = Self;
+            fn sub(self, rhs: Self) -> Self::Output {
+                let (lo, borrow) = self.lo.overflowing_sub(rhs.lo);
+                let (hi, of) = Int::borrowing_sub(self.hi, rhs.hi, borrow);
+                debug_assert!(!of, "attempt to subtract with overflow");
+                Self { lo, hi }
+            }
+        }
 
-    fn add(self, rhs: Self) -> Self::Output {
-        let (lo, carry) = self.lo.overflowing_add(rhs.lo);
-        let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi);
+        impl ops::Shl<u32> for $ty {
+            type Output = Self;
 
-        Self { lo, hi }
-    }
-}
+            fn shl(mut self, rhs: u32) -> Self::Output {
+                debug_assert!(rhs < Self::BITS, "attempt to shift left with overflow");
 
-impl ops::Shr<u32> for u256 {
-    type Output = Self;
+                let half_bits = Self::BITS / 2;
+                let low_mask = half_bits - 1;
+                let s = rhs & low_mask;
 
-    fn shr(mut self, rhs: u32) -> Self::Output {
-        debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow");
-        if rhs >= Self::BITS {
-            return Self::ZERO;
-        }
+                let lo = self.lo;
+                let hi = self.hi;
 
-        if rhs == 0 {
-            return self;
-        }
+                self.lo = lo << s;
 
-        if rhs < 128 {
-            self.lo >>= rhs;
-            self.lo |= self.hi << (128 - rhs);
-        } else {
-            self.lo = self.hi >> (rhs - 128);
+                if rhs & half_bits == 0 {
+                    self.hi = (lo >> (low_mask ^ s) >> 1) as _;
+                    self.hi |= hi << s;
+                } else {
+                    self.hi = self.lo as _;
+                    self.lo = 0;
+                }
+                self
+            }
         }
 
-        if rhs < 128 {
-            self.hi >>= rhs;
-        } else {
-            self.hi = 0;
-        }
+        impl ops::Shr<u32> for $ty {
+            type Output = Self;
 
-        self
-    }
+            fn shr(mut self, rhs: u32) -> Self::Output {
+                debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow");
+
+                let half_bits = Self::BITS / 2;
+                let low_mask = half_bits - 1;
+                let s = rhs & low_mask;
+
+                let lo = self.lo;
+                let hi = self.hi;
+
+                self.hi = hi >> s;
+
+                #[allow(unused_comparisons)]
+                if rhs & half_bits == 0 {
+                    self.lo = (hi << (low_mask ^ s) << 1) as _;
+                    self.lo |= lo >> s;
+                } else {
+                    self.lo = self.hi as _;
+                    self.hi = if hi < 0 { !0 } else { 0 };
+                }
+                self
+            }
+        }
+    };
 }
 
+impl_common!(i256);
+impl_common!(u256);
+
 impl HInt for u128 {
     type D = u256;
 
@@ -200,7 +226,7 @@ impl HInt for u128 {
     }
 
     fn widen_hi(self) -> Self::D {
-        self.widen() << <Self as MinInt>::BITS
+        u256 { lo: 0, hi: self }
     }
 }
 
@@ -208,11 +234,10 @@ impl HInt for i128 {
     type D = i256;
 
     fn widen(self) -> Self::D {
-        let mut ret = self.unsigned().zero_widen().signed();
-        if self.is_negative() {
-            ret.hi = u128::MAX;
+        i256 {
+            lo: self as u128,
+            hi: if self < 0 { -1 } else { 0 },
         }
-        ret
     }
 
     fn zero_widen(self) -> Self::D {
@@ -228,7 +253,7 @@ impl HInt for i128 {
     }
 
     fn widen_hi(self) -> Self::D {
-        self.widen() << <Self as MinInt>::BITS
+        i256 { lo: 0, hi: self }
     }
 }
 
@@ -252,6 +277,6 @@ impl DInt for i256 {
     }
 
     fn hi(self) -> Self::H {
-        self.hi as i128
+        self.hi
     }
 }
diff --git a/libm/src/math/support/big/tests.rs b/libm/src/math/support/big/tests.rs
index d2010f021..d54706c72 100644
--- a/libm/src/math/support/big/tests.rs
+++ b/libm/src/math/support/big/tests.rs
@@ -36,7 +36,7 @@ fn widen_i128() {
         (LOHI_SPLIT as i128).widen(),
         i256 {
             lo: LOHI_SPLIT,
-            hi: u128::MAX
+            hi: -1,
         }
     );
     assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
@@ -275,3 +275,64 @@ fn shr_u256_overflow() {
     assert_eq!(u256::MAX >> 257, u256::ZERO);
     assert_eq!(u256::MAX >> u32::MAX, u256::ZERO);
 }
+
+#[test]
+fn u256_ord() {
+    let _1 = u256::ONE;
+    let _2 = _1 + _1;
+    for x in u8::MIN..u8::MAX {
+        let y = x + 1;
+        let wx = (x as u128).widen_hi();
+        let wy = (y as u128).widen_hi();
+        assert!([wx, wx + _1, wx + _2, wy, wy + _1, wy + _2].is_sorted());
+    }
+}
+#[test]
+fn i256_ord() {
+    let _1 = i256::ONE;
+    let _2 = _1 + _1;
+    for x in i8::MIN..i8::MAX {
+        let y = x + 1;
+        let wx = (x as i128).widen_hi();
+        let wy = (y as i128).widen_hi();
+        assert!([wx, wx + _1, wx + _2, wy - _2, wy - _1, wy].is_sorted());
+    }
+}
+
+#[test]
+fn u256_shifts() {
+    let _1 = u256::ONE;
+    for k in 0..255 {
+        let x = _1 << k;
+        let x2 = _1 << (k + 1);
+        assert!(x < x2);
+        assert_eq!(x << 1, x2);
+        assert_eq!(x + x, x2);
+        assert_eq!(x >> k, _1);
+        assert_eq!(x2 >> (k + 1), _1);
+    }
+}
+#[test]
+fn i256_shifts() {
+    let _1 = i256::ONE;
+    for k in 0..254 {
+        let x = _1 << k;
+        let x2 = _1 << (k + 1);
+        assert!(x < x2);
+        assert_eq!(x << 1, x2);
+        assert_eq!(x + x, x2);
+        assert_eq!(x >> k, _1);
+        assert_eq!(x2 >> (k + 1), _1);
+    }
+
+    let min = _1 << 255;
+    assert_eq!(min, i256::MIN);
+    let mut x = min;
+    for k in 0..255 {
+        assert_eq!(x, min >> k);
+        let y = x >> 1;
+        assert_eq!(y + y, x);
+        assert!(x < y);
+        x = y;
+    }
+}
diff --git a/libm/src/math/support/int_traits.rs b/libm/src/math/support/int_traits.rs
index 9b29e2f45..9d8826dfe 100644
--- a/libm/src/math/support/int_traits.rs
+++ b/libm/src/math/support/int_traits.rs
@@ -37,8 +37,6 @@ pub trait Int:
     + fmt::Display
     + fmt::Binary
     + fmt::LowerHex
-    + PartialEq
-    + PartialOrd
     + ops::AddAssign
     + ops::SubAssign
     + ops::MulAssign
@@ -102,7 +100,10 @@ pub trait Int:
     fn rotate_left(self, other: u32) -> Self;
     fn overflowing_add(self, other: Self) -> (Self, bool);
     fn overflowing_sub(self, other: Self) -> (Self, bool);
+    fn carrying_add(self, other: Self, carry: bool) -> (Self, bool);
+    fn borrowing_sub(self, other: Self, borrow: bool) -> (Self, bool);
     fn leading_zeros(self) -> u32;
+    fn trailing_zeros(self) -> u32;
     fn ilog2(self) -> u32;
 }
 
@@ -168,12 +169,30 @@ macro_rules! int_impl_common {
             <Self>::leading_zeros(self)
         }
 
+        fn trailing_zeros(self) -> u32 {
+            <Self>::trailing_zeros(self)
+        }
+
         fn ilog2(self) -> u32 {
             // On our older MSRV, this resolves to the trait method. Which won't actually work,
             // but this is only called behind other gates.
             #[allow(clippy::incompatible_msrv)]
             <Self>::ilog2(self)
         }
+
+        fn carrying_add(self, other: Self, carry: bool) -> (Self, bool) {
+            let (ab, of1) = self.overflowing_add(other);
+            let (abc, of2) = ab.overflowing_add(Self::from_bool(carry));
+            // `of1 && of2` is possible with signed integers if a negative sum
+            // overflows to `MAX` and adding the carry overflows again back to `MIN`
+            (abc, of1 ^ of2)
+        }
+
+        fn borrowing_sub(self, other: Self, borrow: bool) -> (Self, bool) {
+            let (ab, of1) = self.overflowing_sub(other);
+            let (abc, of2) = ab.overflowing_sub(Self::from_bool(borrow));
+            (abc, of1 ^ of2)
+        }
     };
 }
 

From ed17b95715ddce362fdea2c787e6efb28824f29c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 30 Jun 2025 16:53:21 -0500
Subject: [PATCH 083/133] Use the compiler to determine whether or not to
 enable `f16` and `f128`

Currently we whether or not to build and test `f16` and `f128` support
mostly based on the target triple. This isn't always accurate, however,
since support also varies by backend and the backend version.

Since recently, `rustc` is aware of this with the unstable config option
`target_has_reliable_{f16,f128}`, which better represents when the types
are actually expected to be available and usable. Switch our
compiler-builtins and libm configuration to use this by probing `rustc`
for the target's settings.

A few small `cfg` fixes are needed with this.
---
 builtins-test-intrinsics/build.rs  |  1 -
 builtins-test/benches/float_cmp.rs |  2 +
 builtins-test/build.rs             |  1 -
 builtins-test/tests/conv.rs        |  4 +-
 builtins-test/tests/div_rem.rs     |  4 +-
 compiler-builtins/build.rs         |  3 +-
 compiler-builtins/configure.rs     | 81 +++++++++++-------------------
 libm/configure.rs                  | 81 +++++++++++-------------------
 8 files changed, 66 insertions(+), 111 deletions(-)

diff --git a/builtins-test-intrinsics/build.rs b/builtins-test-intrinsics/build.rs
index 89b126ff2..b82581262 100644
--- a/builtins-test-intrinsics/build.rs
+++ b/builtins-test-intrinsics/build.rs
@@ -6,6 +6,5 @@ fn main() {
     println!("cargo::rerun-if-changed=../configure.rs");
 
     let target = builtins_configure::Target::from_env();
-    builtins_configure::configure_f16_f128(&target);
     builtins_configure::configure_aliases(&target);
 }
diff --git a/builtins-test/benches/float_cmp.rs b/builtins-test/benches/float_cmp.rs
index 87a89efb5..da29b5d31 100644
--- a/builtins-test/benches/float_cmp.rs
+++ b/builtins-test/benches/float_cmp.rs
@@ -177,6 +177,7 @@ float_bench! {
     ],
 }
 
+#[cfg(f128_enabled)]
 float_bench! {
     name: cmp_f128_gt,
     sig: (a: f128, b: f128) -> CmpResult,
@@ -189,6 +190,7 @@ float_bench! {
     asm: []
 }
 
+#[cfg(f128_enabled)]
 float_bench! {
     name: cmp_f128_unord,
     sig: (a: f128, b: f128) -> CmpResult,
diff --git a/builtins-test/build.rs b/builtins-test/build.rs
index e8f4eb4dd..5b2dcd12e 100644
--- a/builtins-test/build.rs
+++ b/builtins-test/build.rs
@@ -116,5 +116,4 @@ fn main() {
     }
 
     builtins_configure::configure_aliases(&target);
-    builtins_configure::configure_f16_f128(&target);
 }
diff --git a/builtins-test/tests/conv.rs b/builtins-test/tests/conv.rs
index 491915d9b..7d729364f 100644
--- a/builtins-test/tests/conv.rs
+++ b/builtins-test/tests/conv.rs
@@ -118,7 +118,7 @@ mod i_to_f {
         i128, __floattidf;
     }
 
-    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(f128_enabled)]
     #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
         u32, __floatunsitf;
@@ -129,7 +129,7 @@ mod i_to_f {
         i128, __floattitf;
     }
 
-    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(f128_enabled)]
     #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
     i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
         u32, __floatunsikf;
diff --git a/builtins-test/tests/div_rem.rs b/builtins-test/tests/div_rem.rs
index 5ae653cc9..e8327f9b4 100644
--- a/builtins-test/tests/div_rem.rs
+++ b/builtins-test/tests/div_rem.rs
@@ -147,7 +147,7 @@ mod float_div {
         f64, __divdf3, Double, all();
     }
 
-    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(f128_enabled)]
     #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     float! {
         f128, __divtf3, Quad,
@@ -156,7 +156,7 @@ mod float_div {
         not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
     }
 
-    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(f128_enabled)]
     #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
     float! {
         f128, __divkf3, Quad, not(feature = "no-sys-f128");
diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs
index 018899faf..8f51c12b5 100644
--- a/compiler-builtins/build.rs
+++ b/compiler-builtins/build.rs
@@ -2,7 +2,7 @@ mod configure;
 
 use std::env;
 
-use configure::{Target, configure_aliases, configure_f16_f128};
+use configure::{Target, configure_aliases};
 
 fn main() {
     println!("cargo::rerun-if-changed=build.rs");
@@ -12,7 +12,6 @@ fn main() {
     let cwd = env::current_dir().unwrap();
 
     configure_check_cfg();
-    configure_f16_f128(&target);
     configure_aliases(&target);
 
     configure_libm(&target);
diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs
index d825f35a9..a1e45080e 100644
--- a/compiler-builtins/configure.rs
+++ b/compiler-builtins/configure.rs
@@ -1,6 +1,7 @@
 // Configuration that is shared between `compiler_builtins` and `builtins_test`.
 
-use std::env;
+use std::process::{Command, Stdio};
+use std::{env, str};
 
 #[derive(Debug)]
 #[allow(dead_code)]
@@ -16,6 +17,8 @@ pub struct Target {
     pub pointer_width: u8,
     pub little_endian: bool,
     pub features: Vec<String>,
+    pub reliable_f128: bool,
+    pub reliable_f16: bool,
 }
 
 impl Target {
@@ -32,6 +35,19 @@ impl Target {
             .map(|s| s.to_lowercase().replace("_", "-"))
             .collect();
 
+        // Query rustc for options that Cargo does not provide env for. The bootstrap hack is used
+        // to get consistent output regardless of channel (`f16`/`f128` config options are hidden
+        // on stable otherwise).
+        let mut cmd = Command::new(env::var("RUSTC").unwrap());
+        cmd.args(["--print=cfg", "--target", &triple])
+            .env("RUSTC_BOOTSTRAP", "1")
+            .stderr(Stdio::inherit());
+        let out = cmd
+            .output()
+            .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}"));
+        assert!(out.status.success(), "failed to run `{cmd:?}`");
+        let rustc_cfg = str::from_utf8(&out.stdout).unwrap();
+
         Self {
             triple,
             triple_split,
@@ -51,6 +67,8 @@ impl Target {
                 .split(",")
                 .map(ToOwned::to_owned)
                 .collect(),
+            reliable_f128: rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"),
+            reliable_f16: rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"),
         }
     }
 
@@ -74,63 +92,24 @@ pub fn configure_aliases(target: &Target) {
     if target.triple_split[0] == "thumbv6m" || target.triple_split[0] == "thumbv8m.base" {
         println!("cargo:rustc-cfg=thumb_1")
     }
-}
-
-/// Configure whether or not `f16` and `f128` support should be enabled.
-pub fn configure_f16_f128(target: &Target) {
-    // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means
-    // that the backend will not crash when using these types and generates code that can be called
-    // without crashing (no infinite recursion). This does not mean that the platform doesn't have
-    // ABI or other bugs.
-    //
-    // We do this here rather than in `rust-lang/rust` because configuring via cargo features is
-    // not straightforward.
-    //
-    // Original source of this list:
-    // <https://github.com/rust-lang/compiler-builtins/pull/652#issuecomment-2266151350>
-    let f16_enabled = match target.arch.as_str() {
-        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
-        "arm64ec" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
-        "s390x" => false,
-        // Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
-        "csky" => false,
-        "hexagon" => false,
-        "powerpc" | "powerpc64" => false,
-        "sparc" | "sparc64" => false,
-        "wasm32" | "wasm64" => false,
-        // Most everything else works as of LLVM 19
-        _ => true,
-    };
 
-    let f128_enabled = match target.arch.as_str() {
-        // Unsupported (libcall is not supported) <https://github.com/llvm/llvm-project/issues/121122>
-        "amdgpu" => false,
-        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
-        "arm64ec" => false,
-        // FIXME(llvm20): fixed by <https://github.com/llvm/llvm-project/pull/117525>
-        "mips64" | "mips64r6" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/95471>
-        "nvptx64" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/101545>
-        "powerpc64" if &target.os == "aix" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/41838>
-        "sparc" => false,
-        // Most everything else works as of LLVM 19
-        _ => true,
-    };
+    /* Not all backends support `f16` and `f128` to the same level on all architectures, so we
+     * need to disable things if the compiler may crash. See configuration at:
+     * * https://github.com/rust-lang/rust/blob/c65dccabacdfd6c8a7f7439eba13422fdd89b91e/compiler/rustc_codegen_llvm/src/llvm_util.rs#L367-L432
+     * * https://github.com/rust-lang/rustc_codegen_gcc/blob/4b5c44b14166083eef8d71f15f5ea1f53fc976a0/src/lib.rs#L496-L507
+     * * https://github.com/rust-lang/rustc_codegen_cranelift/blob/c713ffab3c6e28ab4b4dd4e392330f786ea657ad/src/lib.rs#L196-L226
+     */
 
-    // If the feature is set, disable these types.
-    let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some();
+    // If the feature is set, disable both of these types.
+    let no_f16_f128 = target.cargo_features.iter().any(|s| s == "no-f16-f128");
 
     println!("cargo::rustc-check-cfg=cfg(f16_enabled)");
-    println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
-
-    if f16_enabled && !disable_both {
+    if target.reliable_f16 && !no_f16_f128 {
         println!("cargo::rustc-cfg=f16_enabled");
     }
 
-    if f128_enabled && !disable_both {
+    println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
+    if target.reliable_f128 && !no_f16_f128 {
         println!("cargo::rustc-cfg=f128_enabled");
     }
 }
diff --git a/libm/configure.rs b/libm/configure.rs
index 2a497c7b1..6562ecbe5 100644
--- a/libm/configure.rs
+++ b/libm/configure.rs
@@ -1,7 +1,8 @@
 // Configuration shared with both libm and libm-test
 
-use std::env;
 use std::path::PathBuf;
+use std::process::{Command, Stdio};
+use std::{env, str};
 
 #[allow(dead_code)]
 pub struct Config {
@@ -9,6 +10,7 @@ pub struct Config {
     pub out_dir: PathBuf,
     pub opt_level: String,
     pub cargo_features: Vec<String>,
+    pub target_triple: String,
     pub target_arch: String,
     pub target_env: String,
     pub target_family: Option<String>,
@@ -16,10 +18,13 @@ pub struct Config {
     pub target_string: String,
     pub target_vendor: String,
     pub target_features: Vec<String>,
+    pub reliable_f128: bool,
+    pub reliable_f16: bool,
 }
 
 impl Config {
     pub fn from_env() -> Self {
+        let target_triple = env::var("TARGET").unwrap();
         let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
             .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
             .unwrap_or_default();
@@ -28,7 +33,21 @@ impl Config {
             .map(|s| s.to_lowercase().replace("_", "-"))
             .collect();
 
+        // Query rustc for options that Cargo does not provide env for. The bootstrap hack is used
+        // to get consistent output regardless of channel (`f16`/`f128` config options are hidden
+        // on stable otherwise).
+        let mut cmd = Command::new(env::var("RUSTC").unwrap());
+        cmd.args(["--print=cfg", "--target", &target_triple])
+            .env("RUSTC_BOOTSTRAP", "1")
+            .stderr(Stdio::inherit());
+        let out = cmd
+            .output()
+            .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}"));
+        assert!(out.status.success(), "failed to run `{cmd:?}`");
+        let rustc_cfg = str::from_utf8(&out.stdout).unwrap();
+
         Self {
+            target_triple,
             manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
             out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
             opt_level: env::var("OPT_LEVEL").unwrap(),
@@ -40,6 +59,8 @@ impl Config {
             target_string: env::var("TARGET").unwrap(),
             target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
             target_features,
+            reliable_f128: rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"),
+            reliable_f16: rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"),
         }
     }
 }
@@ -128,62 +149,18 @@ fn emit_f16_f128_cfg(cfg: &Config) {
         return;
     }
 
-    // Set whether or not `f16` and `f128` are supported at a basic level by LLVM. This only means
-    // that the backend will not crash when using these types and generates code that can be called
-    // without crashing (no infinite recursion). This does not mean that the platform doesn't have
-    // ABI or other bugs.
-    //
-    // We do this here rather than in `rust-lang/rust` because configuring via cargo features is
-    // not straightforward.
-    //
-    // Original source of this list:
-    // <https://github.com/rust-lang/compiler-builtins/pull/652#issuecomment-2266151350>
-    let f16_enabled = match cfg.target_arch.as_str() {
-        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
-        "arm64ec" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
-        "s390x" => false,
-        // Infinite recursion <https://github.com/llvm/llvm-project/issues/97981>
-        // FIXME(llvm): loongarch fixed by <https://github.com/llvm/llvm-project/pull/107791>
-        "csky" => false,
-        "hexagon" => false,
-        "loongarch64" => false,
-        "mips" | "mips64" | "mips32r6" | "mips64r6" => false,
-        "powerpc" | "powerpc64" => false,
-        "sparc" | "sparc64" => false,
-        "wasm32" | "wasm64" => false,
-        // Most everything else works as of LLVM 19
-        _ => true,
-    };
-
-    let f128_enabled = match cfg.target_arch.as_str() {
-        // Unsupported (libcall is not supported) <https://github.com/llvm/llvm-project/issues/121122>
-        "amdgpu" => false,
-        // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
-        "arm64ec" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/96432>
-        "mips64" | "mips64r6" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/95471>
-        "nvptx64" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/101545>
-        "powerpc64" if &cfg.target_os == "aix" => false,
-        // Selection failure <https://github.com/llvm/llvm-project/issues/41838>
-        "sparc" => false,
-        // Most everything else works as of LLVM 19
-        _ => true,
-    };
-
-    // If the feature is set, disable these types.
-    let disable_both = env::var_os("CARGO_FEATURE_NO_F16_F128").is_some();
+    /* See the compiler-builtins configure file for info about the meaning of these options */
 
-    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
-    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
+    // If the feature is set, disable both of these types.
+    let no_f16_f128 = cfg.cargo_features.iter().any(|s| s == "no-f16-f128");
 
-    if f16_enabled && !disable_both {
+    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
+    if cfg.reliable_f16 && !no_f16_f128 {
         println!("cargo:rustc-cfg=f16_enabled");
     }
 
-    if f128_enabled && !disable_both {
+    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
+    if cfg.reliable_f128 && !no_f16_f128 {
         println!("cargo:rustc-cfg=f128_enabled");
     }
 }

From 245c676b8e87b50651ebd79847c6e42d3c091824 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 4 Jul 2025 16:53:19 -0500
Subject: [PATCH 084/133] Remove the `let_chains` feature now that it is stable

---
 crates/libm-macros/src/lib.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/crates/libm-macros/src/lib.rs b/crates/libm-macros/src/lib.rs
index 482da974c..7efa1488f 100644
--- a/crates/libm-macros/src/lib.rs
+++ b/crates/libm-macros/src/lib.rs
@@ -1,5 +1,3 @@
-#![feature(let_chains)]
-
 mod enums;
 mod parse;
 mod shared;

From 56aed1d51810830908c635cea2377fb5159e1ab5 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 4 Jul 2025 17:03:45 -0500
Subject: [PATCH 085/133] symcheck: Make `target` a positional argument

This makes it more obvious what we intend to check rather than looking
for `--target`.
---
 ci/run.sh                       | 27 +++++++++++-------------
 crates/symbol-check/src/main.rs | 37 ++++++++++++++++++++-------------
 2 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/ci/run.sh b/ci/run.sh
index 27b9686ea..8b7965bb2 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -54,29 +54,26 @@ symcheck=(cargo run -p symbol-check --release)
 [[ "$target" = "wasm"* ]] && symcheck+=(--features wasm)
 symcheck+=(-- build-and-check)
 
-"${symcheck[@]}" -p compiler_builtins --target "$target"
-"${symcheck[@]}" -p compiler_builtins --target "$target" --release
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features c
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128
-"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release
+"${symcheck[@]}" "$target" -- -p compiler_builtins
+"${symcheck[@]}" "$target" -- -p compiler_builtins --release
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features c
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features c --release
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-asm --release
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128
+"${symcheck[@]}" "$target" -- -p compiler_builtins --features no-f16-f128 --release
 
 run_intrinsics_test() {
-    args=(
-        --target "$target" --verbose \
-        --manifest-path builtins-test-intrinsics/Cargo.toml
-    )
-    args+=( "$@" )
+    build_args=(--verbose --manifest-path builtins-test-intrinsics/Cargo.toml)
+    build_args+=("$@")
 
     # symcheck also checks the results of builtins-test-intrinsics
-    "${symcheck[@]}" "${args[@]}"
+    "${symcheck[@]}" "$target" -- "${build_args[@]}"
 
     # FIXME: we get access violations on Windows, our entrypoint may need to
     # be tweaked.
     if [ "${BUILD_ONLY:-}" != "1" ] && ! [[ "$target" = *"windows"* ]]; then
-        cargo run "${args[@]}"
+        cargo run --target "$target" "${build_args[@]}"
     fi
 }
 
diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index 843a943fb..f60d4f0d3 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -18,10 +18,12 @@ const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe")
 
 const USAGE: &str = "Usage:
 
-    symbol-check build-and-check CARGO_ARGS ...
+    symbol-check build-and-check [TARGET] -- CARGO_BUILD_ARGS ...
 
-Cargo will get invoked with `CARGO_ARGS` and all output
+Cargo will get invoked with `CARGO_ARGS` and the specified target. All output
 `compiler_builtins*.rlib` files will be checked.
+
+If TARGET is not specified, the host target is used.
 ";
 
 fn main() {
@@ -30,11 +32,13 @@ fn main() {
     let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
 
     match &args_ref[1..] {
-        ["build-and-check", "--target", target, args @ ..] if !args.is_empty() => {
-            run_build_and_check(Some(target), args);
+        ["build-and-check", target, "--", args @ ..] if !args.is_empty() => {
+            check_cargo_args(args);
+            run_build_and_check(target, args);
         }
-        ["build-and-check", args @ ..] if !args.is_empty() => {
-            run_build_and_check(None, args);
+        ["build-and-check", "--", args @ ..] if !args.is_empty() => {
+            check_cargo_args(args);
+            run_build_and_check(&host_target(), args);
         }
         _ => {
             println!("{USAGE}");
@@ -43,7 +47,18 @@ fn main() {
     }
 }
 
-fn run_build_and_check(target: Option<&str>, args: &[&str]) {
+/// Make sure `--target` isn't passed to avoid confusion (since it should be proivded only once,
+/// positionally).
+fn check_cargo_args(args: &[&str]) {
+    for arg in args {
+        assert!(
+            !arg.contains("--target"),
+            "target must be passed positionally. {USAGE}"
+        );
+    }
+}
+
+fn run_build_and_check(target: &str, args: &[&str]) {
     let paths = exec_cargo_with_args(target, args);
     for path in paths {
         println!("Checking {}", path.display());
@@ -70,13 +85,7 @@ fn host_target() -> String {
 
 /// Run `cargo build` with the provided additional arguments, collecting the list of created
 /// libraries.
-fn exec_cargo_with_args(target: Option<&str>, args: &[&str]) -> Vec<PathBuf> {
-    let mut host = String::new();
-    let target = target.unwrap_or_else(|| {
-        host = host_target();
-        host.as_str()
-    });
-
+fn exec_cargo_with_args(target: &str, args: &[&str]) -> Vec<PathBuf> {
     let mut cmd = Command::new("cargo");
     cmd.args(["build", "--target", target, "--message-format=json"])
         .args(args)

From 470e968464065adc63a26e1bf64132fb03375925 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 4 Jul 2025 18:10:27 -0500
Subject: [PATCH 086/133] symcheck: Improve diagnostics from spawned Cargo

Rather than printing the entire JSON dump, use the rendered version.
---
 crates/symbol-check/src/main.rs | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index f60d4f0d3..1312a7179 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -87,9 +87,14 @@ fn host_target() -> String {
 /// libraries.
 fn exec_cargo_with_args(target: &str, args: &[&str]) -> Vec<PathBuf> {
     let mut cmd = Command::new("cargo");
-    cmd.args(["build", "--target", target, "--message-format=json"])
-        .args(args)
-        .stdout(Stdio::piped());
+    cmd.args([
+        "build",
+        "--target",
+        target,
+        "--message-format=json-diagnostic-rendered-ansi",
+    ])
+    .args(args)
+    .stdout(Stdio::piped());
 
     println!("running: {cmd:?}");
     let mut child = cmd.spawn().expect("failed to launch Cargo");
@@ -100,11 +105,21 @@ fn exec_cargo_with_args(target: &str, args: &[&str]) -> Vec<PathBuf> {
 
     for line in reader.lines() {
         let line = line.expect("failed to read line");
-        println!("{line}"); // tee to stdout
-
-        // Select only steps that create files
         let j: Value = serde_json::from_str(&line).expect("failed to deserialize");
-        if j["reason"] != "compiler-artifact" {
+        let reason = &j["reason"];
+
+        // Forward output that is meant to be user-facing
+        if reason == "compiler-message" {
+            println!("{}", j["message"]["rendered"].as_str().unwrap());
+        } else if reason == "build-finished" {
+            println!("build finshed. success: {}", j["success"]);
+        } else if reason == "build-script-executed" {
+            let pretty = serde_json::to_string_pretty(&j).unwrap();
+            println!("build script output: {pretty}",);
+        }
+
+        // Only interested in the artifact list now
+        if reason != "compiler-artifact" {
             continue;
         }
 

From df2e48eec60eb80012d8c62e2255d85c910be766 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 4 Jul 2025 19:30:31 -0500
Subject: [PATCH 087/133] Remove unused custom target JSON files

8521530f4938 ("Fix __divsi3 and __udivsi3 on thumbv6m targets") removed
tests that use these `thumb*-linux` target files in favor of tests that
use the `thumb*-none` targets, which are available via Rustup. The JSON
files haven't been used since then and are outdated, so remove them.
---
 thumbv6m-linux-eabi.json    | 28 ----------------------------
 thumbv7em-linux-eabi.json   | 27 ---------------------------
 thumbv7em-linux-eabihf.json | 28 ----------------------------
 thumbv7m-linux-eabi.json    | 27 ---------------------------
 4 files changed, 110 deletions(-)
 delete mode 100644 thumbv6m-linux-eabi.json
 delete mode 100644 thumbv7em-linux-eabi.json
 delete mode 100644 thumbv7em-linux-eabihf.json
 delete mode 100644 thumbv7m-linux-eabi.json

diff --git a/thumbv6m-linux-eabi.json b/thumbv6m-linux-eabi.json
deleted file mode 100644
index ac736eae6..000000000
--- a/thumbv6m-linux-eabi.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-    "abi-blacklist": [
-        "stdcall",
-        "fastcall",
-        "vectorcall",
-        "win64",
-        "sysv64"
-    ],
-    "arch": "arm",
-    "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
-    "env": "",
-    "executables": true,
-    "features": "+strict-align",
-    "linker": "arm-none-eabi-gcc",
-    "linker-flavor": "gcc",
-    "llvm-target": "thumbv6m-none-eabi",
-    "max-atomic-width": 0,
-    "os": "linux",
-    "panic-strategy": "abort",
-    "pre-link-args": {
-        "gcc": ["-nostartfiles"]
-    },
-    "relocation-model": "static",
-    "target-endian": "little",
-    "target-pointer-width": "32",
-    "target-c-int-width": "32",
-    "vendor": ""
-}
diff --git a/thumbv7em-linux-eabi.json b/thumbv7em-linux-eabi.json
deleted file mode 100644
index b6d4a6bda..000000000
--- a/thumbv7em-linux-eabi.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-    "abi-blacklist": [
-        "stdcall",
-        "fastcall",
-        "vectorcall",
-        "win64",
-        "sysv64"
-    ],
-    "arch": "arm",
-    "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
-    "env": "",
-    "executables": true,
-    "linker": "arm-none-eabi-gcc",
-    "linker-flavor": "gcc",
-    "llvm-target": "thumbv7em-none-eabi",
-    "max-atomic-width": 32,
-    "os": "linux",
-    "panic-strategy": "abort",
-    "pre-link-args": {
-        "gcc": ["-nostartfiles"]
-    },
-    "relocation-model": "static",
-    "target-endian": "little",
-    "target-pointer-width": "32",
-    "target-c-int-width": "32",
-    "vendor": ""
-}
diff --git a/thumbv7em-linux-eabihf.json b/thumbv7em-linux-eabihf.json
deleted file mode 100644
index 81cfcd48d..000000000
--- a/thumbv7em-linux-eabihf.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-    "abi-blacklist": [
-        "stdcall",
-        "fastcall",
-        "vectorcall",
-        "win64",
-        "sysv64"
-    ],
-    "arch": "arm",
-    "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
-    "env": "",
-    "executables": true,
-    "features": "+vfp4,+d16,+fp-only-sp",
-    "linker": "arm-none-eabi-gcc",
-    "linker-flavor": "gcc",
-    "llvm-target": "thumbv7em-none-eabihf",
-    "max-atomic-width": 32,
-    "os": "linux",
-    "panic-strategy": "abort",
-    "pre-link-args": {
-        "gcc": ["-nostartfiles"]
-    },
-    "relocation-model": "static",
-    "target-endian": "little",
-    "target-pointer-width": "32",
-    "target-c-int-width": "32",
-    "vendor": ""
-}
diff --git a/thumbv7m-linux-eabi.json b/thumbv7m-linux-eabi.json
deleted file mode 100644
index abe037c5b..000000000
--- a/thumbv7m-linux-eabi.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-    "abi-blacklist": [
-        "stdcall",
-        "fastcall",
-        "vectorcall",
-        "win64",
-        "sysv64"
-    ],
-    "arch": "arm",
-    "data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
-    "env": "",
-    "executables": true,
-    "linker": "arm-none-eabi-gcc",
-    "linker-flavor": "gcc",
-    "llvm-target": "thumbv7m-none-eabi",
-    "max-atomic-width": 32,
-    "os": "linux",
-    "panic-strategy": "abort",
-    "pre-link-args": {
-        "gcc": ["-nostartfiles"]
-    },
-    "relocation-model": "static",
-    "target-endian": "little",
-    "target-pointer-width": "32",
-    "target-c-int-width": "32",
-    "vendor": ""
-}

From 8aba4c899ee89eef7fe688cdfa6629ddd56908f9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 4 Jul 2025 19:42:18 -0500
Subject: [PATCH 088/133] Test building custom targets and resolve an issue
 probing `rustc`

The `rustc` probe done in our build scripts needs to pass `--target` to
get the correct configuration, which usually comes from the `TARGET`
environment variable. However, for targets specified via a `target.json`
file, `TARGET` gets set to the file name without an extension or path.
`rustc` will check a search path to attempt to locate the file, but this
is likely to fail since the directory where Cargo invokes build scripts
(and hence where those scripts invoke `rustc`) might not have any
relation to the JSON spec file.

Resolve this for now by leaving `f16` and `f128` disabled if the `rustc`
command fails. Result of the discussion at CARGO-14208 may eventually
provide a better solution.

A CI test is also added since custom JSON files are an edge case that
could fail in other ways. I verified this fails without the fix here.
The JSON file is the output for `thumbv7em-none-eabi`, just renamed so
`rustc` doesn't identify it.
---
 .github/workflows/main.yaml          | 20 ++++++++++++++++++++
 compiler-builtins/configure.rs       | 13 ++++++++++---
 etc/thumbv7em-none-eabi-renamed.json | 23 +++++++++++++++++++++++
 libm/configure.rs                    | 13 ++++++++++---
 4 files changed, 63 insertions(+), 6 deletions(-)
 create mode 100644 etc/thumbv7em-none-eabi-renamed.json

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 95b0962b0..541c99c82 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -195,6 +195,25 @@ jobs:
       run: ./ci/update-musl.sh
     - run: cargo clippy --workspace --all-targets
 
+  build-custom:
+    name: Build custom target
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: |
+        rustup update nightly --no-self-update
+        rustup default nightly
+        rustup component add rust-src
+    - uses: Swatinem/rust-cache@v2
+    - run: |
+        # Ensure we can build with custom target.json files (these can interact
+        # poorly with build scripts)
+        cargo build -p compiler_builtins -p libm \
+          --target etc/thumbv7em-none-eabi-renamed.json \
+          -Zbuild-std=core
+
   benchmarks:
     name: Benchmarks
     timeout-minutes: 20
@@ -331,6 +350,7 @@ jobs:
   success:
     needs:
       - benchmarks
+      - build-custom
       - clippy
       - extensive
       - miri
diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs
index a1e45080e..9721ddf09 100644
--- a/compiler-builtins/configure.rs
+++ b/compiler-builtins/configure.rs
@@ -45,9 +45,16 @@ impl Target {
         let out = cmd
             .output()
             .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}"));
-        assert!(out.status.success(), "failed to run `{cmd:?}`");
         let rustc_cfg = str::from_utf8(&out.stdout).unwrap();
 
+        // If we couldn't query `rustc` (e.g. a custom JSON target was used), make the safe
+        // choice and leave `f16` and `f128` disabled.
+        let rustc_output_ok = out.status.success();
+        let reliable_f128 =
+            rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f128");
+        let reliable_f16 =
+            rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f16");
+
         Self {
             triple,
             triple_split,
@@ -67,8 +74,8 @@ impl Target {
                 .split(",")
                 .map(ToOwned::to_owned)
                 .collect(),
-            reliable_f128: rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"),
-            reliable_f16: rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"),
+            reliable_f128,
+            reliable_f16,
         }
     }
 
diff --git a/etc/thumbv7em-none-eabi-renamed.json b/etc/thumbv7em-none-eabi-renamed.json
new file mode 100644
index 000000000..81273d44e
--- /dev/null
+++ b/etc/thumbv7em-none-eabi-renamed.json
@@ -0,0 +1,23 @@
+{
+  "abi": "eabi",
+  "arch": "arm",
+  "c-enum-min-bits": 8,
+  "crt-objects-fallback": "false",
+  "data-layout": "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64",
+  "emit-debug-gdb-scripts": false,
+  "frame-pointer": "always",
+  "linker": "rust-lld",
+  "linker-flavor": "gnu-lld",
+  "llvm-floatabi": "soft",
+  "llvm-target": "thumbv7em-none-eabi",
+  "max-atomic-width": 32,
+  "metadata": {
+    "description": "Bare ARMv7E-M",
+    "host_tools": false,
+    "std": false,
+    "tier": 2
+  },
+  "panic-strategy": "abort",
+  "relocation-model": "static",
+  "target-pointer-width": "32"
+}
diff --git a/libm/configure.rs b/libm/configure.rs
index 6562ecbe5..f9100d2d5 100644
--- a/libm/configure.rs
+++ b/libm/configure.rs
@@ -43,9 +43,16 @@ impl Config {
         let out = cmd
             .output()
             .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}"));
-        assert!(out.status.success(), "failed to run `{cmd:?}`");
         let rustc_cfg = str::from_utf8(&out.stdout).unwrap();
 
+        // If we couldn't query `rustc` (e.g. a custom JSON target was used), make the safe
+        // choice and leave `f16` and `f128` disabled.
+        let rustc_output_ok = out.status.success();
+        let reliable_f128 =
+            rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f128");
+        let reliable_f16 =
+            rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f16");
+
         Self {
             target_triple,
             manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
@@ -59,8 +66,8 @@ impl Config {
             target_string: env::var("TARGET").unwrap(),
             target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
             target_features,
-            reliable_f128: rustc_cfg.lines().any(|l| l == "target_has_reliable_f128"),
-            reliable_f16: rustc_cfg.lines().any(|l| l == "target_has_reliable_f16"),
+            reliable_f128,
+            reliable_f16,
         }
     }
 }

From 735e44f95fc1dbf3c2302c391fa6aed54ce58e7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Fri, 4 Jul 2025 22:15:27 +0200
Subject: [PATCH 089/133] Remove josh-sync crate

---
 Cargo.toml                   |   1 -
 crates/josh-sync/Cargo.toml  |   8 -
 crates/josh-sync/src/main.rs |  45 ----
 crates/josh-sync/src/sync.rs | 401 -----------------------------------
 4 files changed, 455 deletions(-)
 delete mode 100644 crates/josh-sync/Cargo.toml
 delete mode 100644 crates/josh-sync/src/main.rs
 delete mode 100644 crates/josh-sync/src/sync.rs

diff --git a/Cargo.toml b/Cargo.toml
index 41350c6cb..956d738f3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,6 @@ resolver = "2"
 members = [
     "builtins-shim",
     "builtins-test",
-    "crates/josh-sync",
     "crates/libm-macros",
     "crates/musl-math-sys",
     "crates/panic-handler",
diff --git a/crates/josh-sync/Cargo.toml b/crates/josh-sync/Cargo.toml
deleted file mode 100644
index 8e2e891db..000000000
--- a/crates/josh-sync/Cargo.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-[package]
-name = "josh-sync"
-edition = "2024"
-publish = false
-
-[dependencies]
-directories = "6.0.0"
-regex-lite = "0.1.6"
diff --git a/crates/josh-sync/src/main.rs b/crates/josh-sync/src/main.rs
deleted file mode 100644
index 7f0b11900..000000000
--- a/crates/josh-sync/src/main.rs
+++ /dev/null
@@ -1,45 +0,0 @@
-use std::io::{Read, Write};
-use std::process::exit;
-use std::{env, io};
-
-use crate::sync::{GitSync, Josh};
-
-mod sync;
-
-const USAGE: &str = r#"Utility for synchroniing compiler-builtins with rust-lang/rust
-
-Usage:
-
-    josh-sync rustc-pull
-
-        Pull from rust-lang/rust to compiler-builtins. Creates a commit
-        updating the version file, followed by a merge commit.
-
-    josh-sync rustc-push GITHUB_USERNAME [BRANCH]
-
-        Create a branch off of rust-lang/rust updating compiler-builtins.
-"#;
-
-fn main() {
-    let sync = GitSync::from_current_dir();
-
-    // Collect args, then recollect as str refs so we can match on them
-    let args: Vec<_> = env::args().collect();
-    let args: Vec<&str> = args.iter().map(String::as_str).collect();
-
-    match args.as_slice()[1..] {
-        ["rustc-pull"] => sync.rustc_pull(None),
-        ["rustc-push", github_user, branch] => sync.rustc_push(github_user, Some(branch)),
-        ["rustc-push", github_user] => sync.rustc_push(github_user, None),
-        ["start-josh"] => {
-            let _josh = Josh::start();
-            println!("press enter to stop");
-            io::stdout().flush().unwrap();
-            let _ = io::stdin().read(&mut [0u8]).unwrap();
-        }
-        _ => {
-            println!("{USAGE}");
-            exit(1);
-        }
-    }
-}
diff --git a/crates/josh-sync/src/sync.rs b/crates/josh-sync/src/sync.rs
deleted file mode 100644
index 2d89d2d1c..000000000
--- a/crates/josh-sync/src/sync.rs
+++ /dev/null
@@ -1,401 +0,0 @@
-use std::borrow::Cow;
-use std::net::{SocketAddr, TcpStream};
-use std::process::{Command, Stdio, exit};
-use std::time::Duration;
-use std::{env, fs, process, thread};
-
-use regex_lite::Regex;
-
-const JOSH_PORT: u16 = 42042;
-const DEFAULT_PR_BRANCH: &str = "update-builtins";
-
-pub struct GitSync {
-    upstream_repo: String,
-    upstream_ref: String,
-    upstream_url: String,
-    josh_filter: String,
-    josh_url_base: String,
-}
-
-/// This code was adapted from the miri repository, via the rustc-dev-guide
-/// (<https://github.com/rust-lang/rustc-dev-guide/tree/c51adbd12d/josh-sync>)
-impl GitSync {
-    pub fn from_current_dir() -> Self {
-        let upstream_repo =
-            env::var("UPSTREAM_ORG").unwrap_or_else(|_| "rust-lang".to_owned()) + "/rust";
-
-        Self {
-            upstream_url: format!("https://github.com/{upstream_repo}"),
-            upstream_repo,
-            upstream_ref: env::var("UPSTREAM_REF").unwrap_or_else(|_| "HEAD".to_owned()),
-            josh_filter: ":/library/compiler-builtins".to_owned(),
-            josh_url_base: format!("http://localhost:{JOSH_PORT}"),
-        }
-    }
-
-    /// Pull from rust-lang/rust to compiler-builtins.
-    pub fn rustc_pull(&self, commit: Option<String>) {
-        let Self {
-            upstream_ref,
-            upstream_url,
-            upstream_repo,
-            ..
-        } = self;
-
-        let new_upstream_base = commit.unwrap_or_else(|| {
-            let out = check_output(["git", "ls-remote", upstream_url, upstream_ref]);
-            out.split_whitespace()
-                .next()
-                .unwrap_or_else(|| panic!("could not split output: '{out}'"))
-                .to_owned()
-        });
-
-        ensure_clean();
-
-        // Make sure josh is running.
-        let _josh = Josh::start();
-        let josh_url_filtered = self.josh_url(
-            &self.upstream_repo,
-            Some(&new_upstream_base),
-            Some(&self.josh_filter),
-        );
-
-        let previous_upstream_base = fs::read_to_string("rust-version")
-            .expect("failed to read `rust-version`")
-            .trim()
-            .to_string();
-        assert_ne!(previous_upstream_base, new_upstream_base, "nothing to pull");
-
-        let orig_head = check_output(["git", "rev-parse", "HEAD"]);
-        println!("original upstream base: {previous_upstream_base}");
-        println!("new upstream base: {new_upstream_base}");
-        println!("original HEAD: {orig_head}");
-
-        // Fetch the latest upstream HEAD so we can get a summary. Use the Josh URL for caching.
-        run([
-            "git",
-            "fetch",
-            &self.josh_url(&self.upstream_repo, Some(&new_upstream_base), Some(":/")),
-            &new_upstream_base,
-            "--depth=1",
-        ]);
-        let new_summary = check_output(["git", "log", "-1", "--format=%h %s", &new_upstream_base]);
-        let new_summary = replace_references(&new_summary, &self.upstream_repo);
-
-        // Update rust-version file. As a separate commit, since making it part of
-        // the merge has confused the heck out of josh in the past.
-        // We pass `--no-verify` to avoid running git hooks.
-        // We do this before the merge so that if there are merge conflicts, we have
-        // the right rust-version file while resolving them.
-        fs::write("rust-version", format!("{new_upstream_base}\n"))
-            .expect("failed to write rust-version");
-
-        let prep_message = format!(
-            "Update the upstream Rust version\n\n\
-            To prepare for merging from {upstream_repo}, set the version file to:\n\n    \
-            {new_summary}\n\
-            ",
-        );
-        run([
-            "git",
-            "commit",
-            "rust-version",
-            "--no-verify",
-            "-m",
-            &prep_message,
-        ]);
-
-        // Fetch given rustc commit.
-        run(["git", "fetch", &josh_url_filtered]);
-        let incoming_ref = check_output(["git", "rev-parse", "FETCH_HEAD"]);
-        println!("incoming ref: {incoming_ref}");
-
-        let merge_message = format!(
-            "Merge ref '{upstream_head_short}{filter}' from {upstream_url}\n\n\
-            Pull recent changes from {upstream_repo} via Josh.\n\n\
-            Upstream ref: {new_upstream_base}\n\
-            Filtered ref: {incoming_ref}\n\
-            ",
-            upstream_head_short = &new_upstream_base[..12],
-            filter = self.josh_filter
-        );
-
-        // This should not add any new root commits. So count those before and after merging.
-        let num_roots = || -> u32 {
-            let out = check_output(["git", "rev-list", "HEAD", "--max-parents=0", "--count"]);
-            out.trim()
-                .parse::<u32>()
-                .unwrap_or_else(|e| panic!("failed to parse `{out}`: {e}"))
-        };
-        let num_roots_before = num_roots();
-
-        let pre_merge_sha = check_output(["git", "rev-parse", "HEAD"]);
-        println!("pre-merge HEAD: {pre_merge_sha}");
-
-        // Merge the fetched commit.
-        run([
-            "git",
-            "merge",
-            "FETCH_HEAD",
-            "--no-verify",
-            "--no-ff",
-            "-m",
-            &merge_message,
-        ]);
-
-        let current_sha = check_output(["git", "rev-parse", "HEAD"]);
-        if current_sha == pre_merge_sha {
-            run(["git", "reset", "--hard", &orig_head]);
-            eprintln!(
-                "No merge was performed, no changes to pull were found. \
-                Rolled back the preparation commit."
-            );
-            exit(1);
-        }
-
-        // Check that the number of roots did not increase.
-        assert_eq!(
-            num_roots(),
-            num_roots_before,
-            "Josh created a new root commit. This is probably not the history you want."
-        );
-    }
-
-    /// Construct an update to rust-lang/rust from compiler-builtins.
-    pub fn rustc_push(&self, github_user: &str, branch: Option<&str>) {
-        let Self {
-            josh_filter,
-            upstream_url,
-            ..
-        } = self;
-
-        let branch = branch.unwrap_or(DEFAULT_PR_BRANCH);
-        let josh_url = self.josh_url(&format!("{github_user}/rust"), None, Some(josh_filter));
-        let user_upstream_url = format!("git@github.com:{github_user}/rust.git");
-
-        let Ok(rustc_git) = env::var("RUSTC_GIT") else {
-            panic!("the RUSTC_GIT environment variable must be set to a rust-lang/rust checkout")
-        };
-
-        ensure_clean();
-        let base = fs::read_to_string("rust-version")
-            .expect("failed to read `rust-version`")
-            .trim()
-            .to_string();
-
-        // Make sure josh is running.
-        let _josh = Josh::start();
-
-        // Prepare the branch. Pushing works much better if we use as base exactly
-        // the commit that we pulled from last time, so we use the `rust-version`
-        // file to find out which commit that would be.
-        println!("Preparing {github_user}/rust (base: {base})...");
-
-        if Command::new("git")
-            .args(["-C", &rustc_git, "fetch", &user_upstream_url, branch])
-            .output() // capture output
-            .expect("could not run fetch")
-            .status
-            .success()
-        {
-            panic!(
-                "The branch '{branch}' seems to already exist in '{user_upstream_url}'. \
-                 Please delete it and try again."
-            );
-        }
-
-        run(["git", "-C", &rustc_git, "fetch", upstream_url, &base]);
-
-        run_cfg("git", |c| {
-            c.args([
-                "-C",
-                &rustc_git,
-                "push",
-                &user_upstream_url,
-                &format!("{base}:refs/heads/{branch}"),
-            ])
-            .stdout(Stdio::null())
-            .stderr(Stdio::null()) // silence the "create GitHub PR" message
-        });
-        println!("pushed PR branch");
-
-        // Do the actual push.
-        println!("Pushing changes...");
-        run(["git", "push", &josh_url, &format!("HEAD:{branch}")]);
-        println!();
-
-        // Do a round-trip check to make sure the push worked as expected.
-        run(["git", "fetch", &josh_url, branch]);
-
-        let head = check_output(["git", "rev-parse", "HEAD"]);
-        let fetch_head = check_output(["git", "rev-parse", "FETCH_HEAD"]);
-        assert_eq!(
-            head, fetch_head,
-            "Josh created a non-roundtrip push! Do NOT merge this into rustc!\n\
-             Expected {head}, got {fetch_head}."
-        );
-        println!(
-            "Confirmed that the push round-trips back to compiler-builtins properly. Please \
-            create a rustc PR:"
-        );
-        // Open PR with `subtree update` title to silence the `no-merges` triagebot check
-        println!(
-            "    {upstream_url}/compare/{github_user}:{branch}?quick_pull=1\
-            &title=Update%20the%20%60compiler-builtins%60%20subtree\
-            &body=Update%20the%20Josh%20subtree%20to%20https%3A%2F%2Fgithub.com%2Frust-lang%2F\
-            compiler-builtins%2Fcommit%2F{head_short}.%0A%0Ar%3F%20%40ghost",
-            head_short = &head[..12],
-        );
-    }
-
-    /// Construct a url to the local Josh server with (optionally)
-    fn josh_url(&self, repo: &str, rev: Option<&str>, filter: Option<&str>) -> String {
-        format!(
-            "{base}/{repo}.git{at}{rev}{filter}{filt_git}",
-            base = self.josh_url_base,
-            at = if rev.is_some() { "@" } else { "" },
-            rev = rev.unwrap_or_default(),
-            filter = filter.unwrap_or_default(),
-            filt_git = if filter.is_some() { ".git" } else { "" }
-        )
-    }
-}
-
-/// Fail if there are files that need to be checked in.
-fn ensure_clean() {
-    let read = check_output(["git", "status", "--untracked-files=no", "--porcelain"]);
-    assert!(
-        read.is_empty(),
-        "working directory must be clean before performing rustc pull"
-    );
-}
-
-/* Helpers for running commands with logged invocations */
-
-/// Run a command from an array, passing its output through.
-fn run<'a, Args: AsRef<[&'a str]>>(l: Args) {
-    let l = l.as_ref();
-    run_cfg(l[0], |c| c.args(&l[1..]));
-}
-
-/// Run a command from an array, collecting its output.
-fn check_output<'a, Args: AsRef<[&'a str]>>(l: Args) -> String {
-    let l = l.as_ref();
-    check_output_cfg(l[0], |c| c.args(&l[1..]))
-}
-
-/// [`run`] with configuration.
-fn run_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) {
-    // self.read(l.as_ref());
-    check_output_cfg(prog, |c| f(c.stdout(Stdio::inherit())));
-}
-
-/// [`read`] with configuration. All shell helpers print the command and pass stderr.
-fn check_output_cfg(prog: &str, f: impl FnOnce(&mut Command) -> &mut Command) -> String {
-    let mut cmd = Command::new(prog);
-    cmd.stderr(Stdio::inherit());
-    f(&mut cmd);
-    eprintln!("+ {cmd:?}");
-    let out = cmd.output().expect("command failed");
-    assert!(out.status.success());
-    String::from_utf8(out.stdout.trim_ascii().to_vec()).expect("non-UTF8 output")
-}
-
-/// Replace `#1234`-style issue/PR references with `repo#1234` to ensure links work across
-/// repositories.
-fn replace_references<'a>(s: &'a str, repo: &str) -> Cow<'a, str> {
-    let re = Regex::new(r"\B(?P<id>#\d+)\b").unwrap();
-    re.replace(s, &format!("{repo}$id"))
-}
-
-/// Create a wrapper that stops Josh on drop.
-pub struct Josh(process::Child);
-
-impl Josh {
-    pub fn start() -> Self {
-        // Determine cache directory.
-        let user_dirs =
-            directories::ProjectDirs::from("org", "rust-lang", "rustc-compiler-builtins-josh")
-                .unwrap();
-        let local_dir = user_dirs.cache_dir().to_owned();
-
-        // Start josh, silencing its output.
-        #[expect(clippy::zombie_processes, reason = "clippy can't handle the loop")]
-        let josh = process::Command::new("josh-proxy")
-            .arg("--local")
-            .arg(local_dir)
-            .args([
-                "--remote=https://github.com",
-                &format!("--port={JOSH_PORT}"),
-                "--no-background",
-            ])
-            .stdout(Stdio::null())
-            .stderr(Stdio::null())
-            .spawn()
-            .expect("failed to start josh-proxy, make sure it is installed");
-
-        // Wait until the port is open. We try every 10ms until 1s passed.
-        for _ in 0..100 {
-            // This will generally fail immediately when the port is still closed.
-            let addr = SocketAddr::from(([127, 0, 0, 1], JOSH_PORT));
-            let josh_ready = TcpStream::connect_timeout(&addr, Duration::from_millis(1));
-
-            if josh_ready.is_ok() {
-                println!("josh up and running");
-                return Josh(josh);
-            }
-
-            // Not ready yet.
-            thread::sleep(Duration::from_millis(10));
-        }
-        panic!("Even after waiting for 1s, josh-proxy is still not available.")
-    }
-}
-
-impl Drop for Josh {
-    fn drop(&mut self) {
-        if cfg!(unix) {
-            // Try to gracefully shut it down.
-            Command::new("kill")
-                .args(["-s", "INT", &self.0.id().to_string()])
-                .output()
-                .expect("failed to SIGINT josh-proxy");
-            // Sadly there is no "wait with timeout"... so we just give it some time to finish.
-            thread::sleep(Duration::from_millis(100));
-            // Now hopefully it is gone.
-            if self
-                .0
-                .try_wait()
-                .expect("failed to wait for josh-proxy")
-                .is_some()
-            {
-                return;
-            }
-        }
-        // If that didn't work (or we're not on Unix), kill it hard.
-        eprintln!(
-            "I have to kill josh-proxy the hard way, let's hope this does not \
-            break anything."
-        );
-        self.0.kill().expect("failed to SIGKILL josh-proxy");
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_replace() {
-        assert_eq!(replace_references("#1234", "r-l/rust"), "r-l/rust#1234");
-        assert_eq!(replace_references("#1234x", "r-l/rust"), "#1234x");
-        assert_eq!(
-            replace_references("merge #1234", "r-l/rust"),
-            "merge r-l/rust#1234"
-        );
-        assert_eq!(
-            replace_references("foo/bar#1234", "r-l/rust"),
-            "foo/bar#1234"
-        );
-    }
-}

From 6e9d1cfbe9db5a1ffe1eb7102ec6dc6219de6105 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Fri, 4 Jul 2025 22:15:56 +0200
Subject: [PATCH 090/133] Add josh-sync config file

---
 josh-sync.toml | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 josh-sync.toml

diff --git a/josh-sync.toml b/josh-sync.toml
new file mode 100644
index 000000000..599a12af8
--- /dev/null
+++ b/josh-sync.toml
@@ -0,0 +1,3 @@
+org = "rust-lang"
+repo = "compiler-builtins"
+path = "library/compiler-builtins"

From e350b9a406797b36a04099bc7b1cf7a2b7a5729c Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Tue, 8 Jul 2025 16:38:35 -0700
Subject: [PATCH 091/133] Disable docs for `compiler-builtins` and `sysroot`

Bootstrap already had a manual doc filter for the `sysroot` crate, but
other library crates keep themselves out of the public docs by setting
`[lib] doc = false` in their manifest. This seems like a better solution
to hide `compiler-builtins` docs, and removes the `sysroot` hack too.
---
 compiler-builtins/Cargo.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index c5446cd76..3ccb05f73 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -19,6 +19,8 @@ links = "compiler-rt"
 bench = false
 doctest = false
 test = false
+# make sure this crate isn't included in public standard library docs
+doc = false
 
 [dependencies]
 core = { path = "../../core", optional = true }

From 0f2c11540ac3bf1debb3afc68ea56b81f7c5f45d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Fri, 4 Jul 2025 22:17:22 +0200
Subject: [PATCH 092/133] Add documentation about subtree sync

---
 CONTRIBUTING.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9f67cfc31..9ae4f893c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -165,3 +165,12 @@ cargo bench --no-default-features \
 
 [`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner
 [Valgrind]: https://valgrind.org/
+
+## Subtree synchronization
+
+`compiler-builtins` is included as a [Josh subtree] in the main compiler
+repository (`rust-lang/rust`). You can find a guide on how to create synchronization
+(pull and push) PRs at the [`rustc-dev-guide` page].
+
+[Josh subtree]: https://rustc-dev-guide.rust-lang.org/external-repos.html#josh-subtrees
+[`rustc-dev-guide` page]: https://rustc-dev-guide.rust-lang.org/external-repos.html#synchronizing-a-josh-subtree

From 90bd9f53aefd4fc40130993c395983bc1d9fb44e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 10 Jul 2025 17:50:49 -0400
Subject: [PATCH 093/133] Upgrade dependencies to the latest version

This picks up a fix in `rustc_apfloat` [1] that resolves a problem with
`fma`.

[1]: https://github.com/rust-lang/rustc_apfloat/releases/tag/rustc_apfloat-v0.2.3%2Bllvm-462a31f5a5ab
---
 builtins-test/Cargo.toml        | 2 +-
 crates/libm-macros/Cargo.toml   | 2 +-
 crates/musl-math-sys/Cargo.toml | 2 +-
 libm-test/Cargo.toml            | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml
index 093d4633f..4607342cd 100644
--- a/builtins-test/Cargo.toml
+++ b/builtins-test/Cargo.toml
@@ -12,7 +12,7 @@ license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 # `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts.
 rand_xoshiro = "0.7"
 # To compare float builtins against
-rustc_apfloat = "0.2.2"
+rustc_apfloat = "0.2.3"
 # Really a dev dependency, but dev dependencies can't be optional
 iai-callgrind = { version = "0.14.1", optional = true }
 
diff --git a/crates/libm-macros/Cargo.toml b/crates/libm-macros/Cargo.toml
index 6bbf47784..100a8d0ec 100644
--- a/crates/libm-macros/Cargo.toml
+++ b/crates/libm-macros/Cargo.toml
@@ -12,7 +12,7 @@ proc-macro = true
 heck = "0.5.0"
 proc-macro2 = "1.0.95"
 quote = "1.0.40"
-syn = { version = "2.0.101", features = ["full", "extra-traits", "visit-mut"] }
+syn = { version = "2.0.104", features = ["full", "extra-traits", "visit-mut"] }
 
 [lints.rust]
 # Values used during testing
diff --git a/crates/musl-math-sys/Cargo.toml b/crates/musl-math-sys/Cargo.toml
index 3b8811734..39f6fa906 100644
--- a/crates/musl-math-sys/Cargo.toml
+++ b/crates/musl-math-sys/Cargo.toml
@@ -11,4 +11,4 @@ license = "MIT OR Apache-2.0"
 libm = { path = "../../libm" }
 
 [build-dependencies]
-cc = "1.2.25"
+cc = "1.2.29"
diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml
index 05fcc3234..e577288c9 100644
--- a/libm-test/Cargo.toml
+++ b/libm-test/Cargo.toml
@@ -32,7 +32,7 @@ anyhow = "1.0.98"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false }
 iai-callgrind = { version = "0.14.1", optional = true }
-indicatif = { version = "0.17.11", default-features = false }
+indicatif = { version = "0.18.0", default-features = false }
 libm = { path = "../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../crates/libm-macros" }
 musl-math-sys = { path = "../crates/musl-math-sys", optional = true }

From 7bba268efa238a171dcc4ee5c1dc6f28310670f2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 10 Jul 2025 17:55:08 -0400
Subject: [PATCH 094/133] Upgrade `iai-callgrind` to 0.15

Pick up the latest version of iai-callgrind, which includes some output
improvements.

Changelog: https://github.com/iai-callgrind/iai-callgrind/releases
---
 builtins-test/Cargo.toml | 2 +-
 ci/bench-icount.sh       | 2 +-
 libm-test/Cargo.toml     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml
index 4607342cd..00a9d8579 100644
--- a/builtins-test/Cargo.toml
+++ b/builtins-test/Cargo.toml
@@ -14,7 +14,7 @@ rand_xoshiro = "0.7"
 # To compare float builtins against
 rustc_apfloat = "0.2.3"
 # Really a dev dependency, but dev dependencies can't be optional
-iai-callgrind = { version = "0.14.1", optional = true }
+iai-callgrind = { version = "0.15.2", optional = true }
 
 [dependencies.compiler_builtins]
 path = "../builtins-shim"
diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
index d2baebb52..12228b9da 100755
--- a/ci/bench-icount.sh
+++ b/ci/bench-icount.sh
@@ -28,7 +28,7 @@ function run_icount_benchmarks() {
 
     iai_args=(
         "--home" "$(pwd)/$iai_home"
-        "--regression=ir=5.0"
+        "--callgrind-limits=ir=5.0"
         "--save-summary"
     )
 
diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml
index e577288c9..0af6b0c1d 100644
--- a/libm-test/Cargo.toml
+++ b/libm-test/Cargo.toml
@@ -31,7 +31,7 @@ short-benchmarks = []
 anyhow = "1.0.98"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false }
-iai-callgrind = { version = "0.14.1", optional = true }
+iai-callgrind = { version = "0.15.2", optional = true }
 indicatif = { version = "0.18.0", default-features = false }
 libm = { path = "../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../crates/libm-macros" }

From 6af9880f89ba621659842ea5cc6722eb53837af3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Sat, 12 Jul 2025 22:30:19 +0200
Subject: [PATCH 095/133] Add CI workflow for automatically performing subtree
 sync pulls

This CI workflow will run the https://github.com/rust-lang/josh-sync
tool on Mondays and Thursdays. It will try to do a pull (sync stdarch
changes from rust-lang/rust into this repository). When it runs, three
things can happen:
- There are no rustc changes to be pulled, the bot does nothing.
- There are some new changes to be pulled. In that case, the bot will
either open or update an existing PR titled "Rustc pull update" on this
repository with the changes. After the PR is merged, we should ideally
do the opposite sync (push) manually.
- The pull fails (usually because of a merge conflict), or the bot
determines that a pull PR has been opened for more than a week without
being merged. In that case, it will post a ping to
https://rust-lang.zulipchat.com/#narrow/channel/219381-t-libs/topic/compiler-builtins.20subtree.20sync.20automation/with/528482375.
---
 .github/workflows/rustc-pull.yml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 .github/workflows/rustc-pull.yml

diff --git a/.github/workflows/rustc-pull.yml b/.github/workflows/rustc-pull.yml
new file mode 100644
index 000000000..ba698492e
--- /dev/null
+++ b/.github/workflows/rustc-pull.yml
@@ -0,0 +1,23 @@
+# Perform a subtree sync (pull) using the josh-sync tool once every few days (or on demand).
+name: rustc-pull
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Run at 04:00 UTC every Monday and Thursday
+    - cron: '0 4 * * 1,4'
+
+jobs:
+  pull:
+    if: github.repository == 'rust-lang/compiler-builtins'
+    uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main
+    with:
+      # https://rust-lang.zulipchat.com/#narrow/channel/219381-t-libs/topic/compiler-builtins.20subtree.20sync.20automation/with/528482375
+      zulip-stream-id: 219381
+      zulip-topic: 'compiler-builtins subtree sync automation'
+      zulip-bot-email:  "compiler-builtins-ci-bot@rust-lang.zulipchat.com"
+      pr-base-branch: master
+      branch-name: rustc-pull
+    secrets:
+      zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
+      token: ${{ secrets.GITHUB_TOKEN }}

From 599f0e6408bef29a69f986afc71fa76e732ddd60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Sat, 12 Jul 2025 22:41:45 +0200
Subject: [PATCH 096/133] Tell triagebot to reopen bot PRs to run CI on them

---
 triagebot.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/triagebot.toml b/triagebot.toml
index ecc05da01..715be27fc 100644
--- a/triagebot.toml
+++ b/triagebot.toml
@@ -19,3 +19,6 @@ check-commits = false
 # Enable issue transfers within the org
 # Documentation at: https://forge.rust-lang.org/triagebot/transfer.html
 [transfer]
+
+# Automatically close and reopen PRs made by bots to run CI on them
+[bot-pull-requests]

From e8cfc9493c167390a267aea27aadc134692879a9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 17 Jul 2025 03:58:43 -0500
Subject: [PATCH 097/133] Allow a new lint failure in nightly

```text
warning: function `f32_to_bits` is never used
   --> libm/src/math/support/float_traits.rs:367:14
    |
367 | pub const fn f32_to_bits(x: f32) -> u32 {
    |              ^^^^^^^^^^^
    |
    = note: `#[warn(dead_code)]` on by default

warning: function `f64_to_bits` is never used
   --> libm/src/math/support/float_traits.rs:381:14
    |
381 | pub const fn f64_to_bits(x: f64) -> u64 {
    |              ^^^^^^^^^^^

warning: `libm` (lib) generated 2 warnings
```

This is a false positive, see RUST-144060.
---
 libm/src/math/support/float_traits.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libm/src/math/support/float_traits.rs b/libm/src/math/support/float_traits.rs
index c3e7eeec2..fb790e696 100644
--- a/libm/src/math/support/float_traits.rs
+++ b/libm/src/math/support/float_traits.rs
@@ -363,6 +363,7 @@ pub const fn f32_from_bits(bits: u32) -> f32 {
 }
 
 /// `f32::to_bits`
+#[allow(dead_code)] // workaround for false positive RUST-144060
 #[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
 pub const fn f32_to_bits(x: f32) -> u32 {
     // SAFETY: POD cast with no preconditions
@@ -377,6 +378,7 @@ pub const fn f64_from_bits(bits: u64) -> f64 {
 }
 
 /// `f64::to_bits`
+#[allow(dead_code)] // workaround for false positive RUST-144060
 #[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
 pub const fn f64_to_bits(x: f64) -> u64 {
     // SAFETY: POD cast with no preconditions

From 59b329a79d8a1e667597c59535a27a5836ba86d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Thu, 17 Jul 2025 12:30:52 +0200
Subject: [PATCH 098/133] Update the `no-merges` PR title

Match the new CI-created PRs:
https://github.com/rust-lang/compiler-builtins/pull/974.
---
 triagebot.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/triagebot.toml b/triagebot.toml
index 715be27fc..8a2356c2b 100644
--- a/triagebot.toml
+++ b/triagebot.toml
@@ -4,7 +4,7 @@
 # Warns when a PR contains merge commits
 # Documentation at: https://forge.rust-lang.org/triagebot/no-merge.html
 [no-merges]
-exclude_titles = ["Update from"]
+exclude_titles = ["Rustc pull update"]
 
 # Canonicalize issue numbers to avoid closing the wrong issue
 # when commits are included in subtrees, as well as warning links in commits.

From 5d33f9d9f3e985fd1748386845a30ef4a7d4fc55 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Jul 2025 17:35:57 +0000
Subject: [PATCH 099/133] mem: Use `core::ffi::c_int`

This alias was added in 9897bfb8a ("Fix memset arguments for MSP430
target"), which predates `core::ffi`. Now that it exists we can just use
`core::ffi::c_int`.
---
 compiler-builtins/src/mem/mod.rs | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/compiler-builtins/src/mem/mod.rs b/compiler-builtins/src/mem/mod.rs
index 6828f3804..a6f533cb7 100644
--- a/compiler-builtins/src/mem/mod.rs
+++ b/compiler-builtins/src/mem/mod.rs
@@ -3,13 +3,6 @@
 // FIXME(e2024): this eventually needs to be removed.
 #![allow(unsafe_op_in_unsafe_fn)]
 
-#[allow(warnings)]
-#[cfg(target_pointer_width = "16")]
-type c_int = i16;
-#[allow(warnings)]
-#[cfg(not(target_pointer_width = "16"))]
-type c_int = i32;
-
 // memcpy/memmove/memset have optimized implementations on some architectures
 #[cfg_attr(
     all(not(feature = "no-asm"), target_arch = "x86_64"),
@@ -38,7 +31,7 @@ intrinsics! {
     }
 
     #[mem_builtin]
-    pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 {
+    pub unsafe extern "C" fn memset(s: *mut u8, c: core::ffi::c_int, n: usize) -> *mut u8 {
         impls::set_bytes(s, c as u8, n);
         s
     }

From 556be9bfc9f2bea7d462388143039737184f00d0 Mon Sep 17 00:00:00 2001
From: Julien THILLARD <54775010+supersurviveur@users.noreply.github.com>
Date: Fri, 18 Jul 2025 20:19:13 +0200
Subject: [PATCH 100/133] Change the `memcmp` and `bcmp` return type to `c_int`

Fix the return type of `memcmp` and `bcmp` builtin functions on targets
with a `c_int` other than `i32`.

Linked issue: https://github.com/rust-lang/rust/issues/144076
---
 compiler-builtins/src/mem/impls.rs | 5 +++--
 compiler-builtins/src/mem/mod.rs   | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/compiler-builtins/src/mem/impls.rs b/compiler-builtins/src/mem/impls.rs
index 14a478748..da16dee25 100644
--- a/compiler-builtins/src/mem/impls.rs
+++ b/compiler-builtins/src/mem/impls.rs
@@ -15,6 +15,7 @@
 // this use. Of course this is not a guarantee that such use will work, it just means that this
 // crate doing wrapping pointer arithmetic with a method that must not wrap won't be the problem if
 // something does go wrong at runtime.
+use core::ffi::c_int;
 use core::intrinsics::likely;
 
 const WORD_SIZE: usize = core::mem::size_of::<usize>();
@@ -384,13 +385,13 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
 }
 
 #[inline(always)]
-pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> c_int {
     let mut i = 0;
     while i < n {
         let a = *s1.wrapping_add(i);
         let b = *s2.wrapping_add(i);
         if a != b {
-            return a as i32 - b as i32;
+            return c_int::from(a) - c_int::from(b);
         }
         i += 1;
     }
diff --git a/compiler-builtins/src/mem/mod.rs b/compiler-builtins/src/mem/mod.rs
index a6f533cb7..a227f60a2 100644
--- a/compiler-builtins/src/mem/mod.rs
+++ b/compiler-builtins/src/mem/mod.rs
@@ -37,12 +37,12 @@ intrinsics! {
     }
 
     #[mem_builtin]
-    pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+    pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> core::ffi::c_int {
         impls::compare_bytes(s1, s2, n)
     }
 
     #[mem_builtin]
-    pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+    pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> core::ffi::c_int {
         memcmp(s1, s2, n)
     }
 

From 794c772e31c6b4b3fcd79a7d5e103ad24787c062 Mon Sep 17 00:00:00 2001
From: The rustc-josh-sync Cronjob Bot <github-actions@github.com>
Date: Fri, 18 Jul 2025 19:04:50 +0000
Subject: [PATCH 101/133] Prepare for merging from rust-lang/rust

This updates the rust-version file to 82310651b93a594a3fd69015e1562186a080d94c.
---
 rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust-version b/rust-version
index 731839835..a4db05a87 100644
--- a/rust-version
+++ b/rust-version
@@ -1 +1 @@
-d087f112b7d1323446c7b39a8b616aee7fa56b3d
+82310651b93a594a3fd69015e1562186a080d94c

From 6aed0ee92adbdea0d99b7539b31efd3f4d6bc4e9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 21 Jul 2025 12:18:07 -0500
Subject: [PATCH 102/133] ci: Switch to nightly rustfmt

We are getting warnings in CI about unsupported features. There isn't
any reason to use stable rustfmt so switch the channel here.
---
 .github/workflows/main.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 541c99c82..972f1b898 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -311,8 +311,8 @@ jobs:
     timeout-minutes: 10
     steps:
     - uses: actions/checkout@v4
-    - name: Install stable `rustfmt`
-      run: rustup set profile minimal && rustup default stable && rustup component add rustfmt
+    - name: Install nightly `rustfmt`
+      run: rustup set profile minimal && rustup default nightly && rustup component add rustfmt
     - run: cargo fmt -- --check
 
   extensive:

From 0822c2615343f4b5fb7b46fc7231bf2d3aa6c37b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 23 Jul 2025 04:50:41 -0500
Subject: [PATCH 103/133] ci: Add native PowerPC64LE and s390x jobs

We now have access to native runners, so make use of them for these
architectures. The existing ppc64le Docker job is kept for now.
---
 .github/workflows/main.yaml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 972f1b898..6c98a60d2 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -70,8 +70,12 @@ jobs:
           os: ubuntu-24.04
         - target: powerpc64le-unknown-linux-gnu
           os: ubuntu-24.04
+        - target: powerpc64le-unknown-linux-gnu
+          os: ubuntu-24.04-ppc64le
         - target: riscv64gc-unknown-linux-gnu
           os: ubuntu-24.04
+        - target: s390x-unknown-linux-gnu
+          os: ubuntu-24.04-s390x
         - target: thumbv6m-none-eabi
           os: ubuntu-24.04
         - target: thumbv7em-none-eabi
@@ -105,8 +109,21 @@ jobs:
       TEST_VERBATIM: ${{ matrix.test_verbatim }}
       MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }}
     steps:
+    - name: Print $HOME
+      shell: bash
+      run: |
+        set -x
+        echo "${HOME:-not found}"
+        pwd
+        printenv
     - name: Print runner information
       run: uname -a
+
+    # Native ppc and s390x runners don't have rustup by default
+    - name: Install rustup
+      if: matrix.os == 'ubuntu-24.04-ppc64le' || matrix.os == 'ubuntu-24.04-s390x'
+      run: sudo apt-get update && sudo apt-get install -y rustup
+
     - uses: actions/checkout@v4
     - name: Install Rust (rustup)
       shell: bash
@@ -117,7 +134,12 @@ jobs:
         rustup update "$channel" --no-self-update
         rustup default "$channel"
         rustup target add "${{ matrix.target }}"
+
+    # Our scripts use nextest if possible. This is skipped on the native ppc
+    # and s390x runners since install-action doesn't support them.
     - uses: taiki-e/install-action@nextest
+      if: "!(matrix.os == 'ubuntu-24.04-ppc64le' || matrix.os == 'ubuntu-24.04-s390x')"
+
     - uses: Swatinem/rust-cache@v2
       with:
         key: ${{ matrix.target }}

From 61f16d0da29e362f9679bca31b1ad06781b3442c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 24 Jul 2025 01:20:55 -0500
Subject: [PATCH 104/133] ci: Update to the latest ubuntu:25.04 Docker images

This includes a qemu update from 8.2.2 to 9.2.1 which should hopefully
fix some bugs we have encountered.

PowerPC64LE is skipped for now because the new version seems to cause a
number of new SIGILLs.
---
 ci/docker/aarch64-unknown-linux-gnu/Dockerfile       | 2 +-
 ci/docker/arm-unknown-linux-gnueabi/Dockerfile       | 2 +-
 ci/docker/arm-unknown-linux-gnueabihf/Dockerfile     | 2 +-
 ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile   | 2 +-
 ci/docker/i586-unknown-linux-gnu/Dockerfile          | 2 +-
 ci/docker/i686-unknown-linux-gnu/Dockerfile          | 2 +-
 ci/docker/loongarch64-unknown-linux-gnu/Dockerfile   | 2 +-
 ci/docker/mips-unknown-linux-gnu/Dockerfile          | 2 +-
 ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile   | 2 +-
 ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile | 2 +-
 ci/docker/mipsel-unknown-linux-gnu/Dockerfile        | 2 +-
 ci/docker/powerpc-unknown-linux-gnu/Dockerfile       | 2 +-
 ci/docker/powerpc64-unknown-linux-gnu/Dockerfile     | 2 +-
 ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile   | 1 +
 ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile     | 2 +-
 ci/docker/thumbv6m-none-eabi/Dockerfile              | 2 +-
 ci/docker/thumbv7em-none-eabi/Dockerfile             | 2 +-
 ci/docker/thumbv7em-none-eabihf/Dockerfile           | 2 +-
 ci/docker/thumbv7m-none-eabi/Dockerfile              | 2 +-
 ci/docker/x86_64-unknown-linux-gnu/Dockerfile        | 2 +-
 ci/run-docker.sh                                     | 2 +-
 21 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index df71804ba..69b99f5b6 100644
--- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
index 38ad1a136..2fa6f8520 100644
--- a/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
+++ b/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
index ffead05d5..85f7335f5 100644
--- a/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
+++ b/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
index 9ab49e46e..42511479f 100644
--- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile
index d12ced325..35488c477 100644
--- a/ci/docker/i586-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/i686-unknown-linux-gnu/Dockerfile b/ci/docker/i686-unknown-linux-gnu/Dockerfile
index d12ced325..35488c477 100644
--- a/ci/docker/i686-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/i686-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
index 62b43da9e..e95a1b916 100644
--- a/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/mips-unknown-linux-gnu/Dockerfile b/ci/docker/mips-unknown-linux-gnu/Dockerfile
index c02a94672..fd1877603 100644
--- a/ci/docker/mips-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/mips-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
index 6d8b96069..4e542ce68 100644
--- a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
+++ b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
index 7e6ac7c3b..528dfd894 100644
--- a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
+++ b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
index 9feadc7b5..257218023 100644
--- a/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
index 84dcaf47e..cac1f2361 100644
--- a/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
index b90fd5ec5..76127b7db 100644
--- a/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
index e6d1d1cd0..c95adecf0 100644
--- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
@@ -1,3 +1,4 @@
+# FIXME(ppc): We want 25.04 but get SIGILLs
 ARG IMAGE=ubuntu:24.04
 FROM $IMAGE
 
diff --git a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
index eeb4ed019..513efacd6 100644
--- a/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/thumbv6m-none-eabi/Dockerfile b/ci/docker/thumbv6m-none-eabi/Dockerfile
index ad0d4351e..a9a172a21 100644
--- a/ci/docker/thumbv6m-none-eabi/Dockerfile
+++ b/ci/docker/thumbv6m-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/thumbv7em-none-eabi/Dockerfile b/ci/docker/thumbv7em-none-eabi/Dockerfile
index ad0d4351e..a9a172a21 100644
--- a/ci/docker/thumbv7em-none-eabi/Dockerfile
+++ b/ci/docker/thumbv7em-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/thumbv7em-none-eabihf/Dockerfile b/ci/docker/thumbv7em-none-eabihf/Dockerfile
index ad0d4351e..a9a172a21 100644
--- a/ci/docker/thumbv7em-none-eabihf/Dockerfile
+++ b/ci/docker/thumbv7em-none-eabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/thumbv7m-none-eabi/Dockerfile b/ci/docker/thumbv7m-none-eabi/Dockerfile
index ad0d4351e..a9a172a21 100644
--- a/ci/docker/thumbv7m-none-eabi/Dockerfile
+++ b/ci/docker/thumbv7m-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index c590adcdd..2ef800129 100644
--- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/ci/run-docker.sh b/ci/run-docker.sh
index d0122dee5..4c1fe0fe2 100755
--- a/ci/run-docker.sh
+++ b/ci/run-docker.sh
@@ -97,7 +97,7 @@ if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then
     usage: ./ci/run-docker.sh [target]
 
     you can also set DOCKER_BASE_IMAGE to use something other than the default
-    ubuntu:24.04 (or rustlang/rust:nightly).
+    ubuntu:25.04 (or rustlang/rust:nightly).
     "
     exit
 fi

From b185e89bbd2252df37b9abca1df50ba8b0b26a2e Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 24 Jul 2025 03:26:32 -0500
Subject: [PATCH 105/133] symcheck: Switch the `object` dependency from git to
 crates.io

Wasm support has since been released, so we no longer need to depend on
a git version of `object`.
---
 crates/symbol-check/Cargo.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/crates/symbol-check/Cargo.toml b/crates/symbol-check/Cargo.toml
index 30969ee40..e2218b491 100644
--- a/crates/symbol-check/Cargo.toml
+++ b/crates/symbol-check/Cargo.toml
@@ -5,8 +5,7 @@ edition = "2024"
 publish = false
 
 [dependencies]
-# FIXME: used as a git dependency since the latest release does not support wasm
-object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" }
+object = "0.37.1"
 serde_json = "1.0.140"
 
 [features]

From 0c7a82c63410008b97450fa0acd2bffb25664894 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 24 Jul 2025 04:05:41 -0500
Subject: [PATCH 106/133] ci: Use a mirror for musl

We pretty often get at least one job failed because of failure to pull
the musl git repo. Switch this to the unofficial mirror [1] which should
be more reliable.

Link: https://github.com/kraj/musl [1]
---
 ci/update-musl.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/update-musl.sh b/ci/update-musl.sh
index b71cf5778..637ab1394 100755
--- a/ci/update-musl.sh
+++ b/ci/update-musl.sh
@@ -3,7 +3,7 @@
 
 set -eux
 
-url=git://git.musl-libc.org/musl
+url=https://github.com/kraj/musl.git
 ref=c47ad25ea3b484e10326f933e927c0bc8cded3da
 dst=crates/musl-math-sys/musl
 

From 3fa5a8cba55d4e8a0fce06897fd477d66b41da48 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 24 Jul 2025 04:49:46 -0500
Subject: [PATCH 107/133] ci: Upgrade ubuntu:25.04 for the PowerPC64LE test

Update the last remaining image.

For this to work, the `QEMU_CPU=POWER8` configuration needed to be
dropped to avoid a new SIGILL. Doing some debugging locally, the crash
comes from an `extswsli` (per `powerpc:common64` in gdb-multiarch) in
the `ld64.so` available with PowerPC, which qemu rejects when set to
power8. Testing a build with `+crt-static` hits the same issue at a
`maddld` in `__libc_start_main_impl`.

Rust isn't needed to reproduce this:

    $ cat a.c
    #include <stdio.h>

    int main() {
            printf("Hello, world!\n");
    }
    $ powerpc64le-linux-gnu-gcc a.c
    $ QEMU_CPU=power8 QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu/ ./a.out
    qemu: uncaught target signal 4 (Illegal instruction) - core dumped
    Illegal instruction

So the cross toolchain provided by Debian must have a power9 baseline
rather than rustc's power8. Alternatively, qemu may be incorrectly
rejecting these instructions (I can't find a source on whether or not
they should be available for power8). Testing instead with the `-musl`
toolchain and ppc linker from musl.cc works correctly.

In any case, things work with the default qemu config so it seems fine
to drop. The env was originally added in 5d164a4edafb ("fix the
powerpc64le target") but whatever the problem was there appears to no
longer be relevant.
---
 ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
index c95adecf0..da1d56ca6 100644
--- a/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
@@ -1,5 +1,4 @@
-# FIXME(ppc): We want 25.04 but get SIGILLs
-ARG IMAGE=ubuntu:24.04
+ARG IMAGE=ubuntu:25.04
 FROM $IMAGE
 
 RUN apt-get update && \
@@ -13,6 +12,5 @@ ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
     CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc64le-static \
     AR_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"ar \
     CC_powerpc64le_unknown_linux_gnu="$TOOLCHAIN_PREFIX"gcc \
-    QEMU_CPU=POWER8 \
     QEMU_LD_PREFIX=/usr/powerpc64le-linux-gnu \
     RUST_TEST_THREADS=1

From 9c4ec8b508d9fbc6e263ace2dee36af630cc5f6b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 24 Jul 2025 01:18:46 -0500
Subject: [PATCH 108/133] Enable tests that were skipped on PowerPC

Most of these were skipped because of a bug with the platform
implementation, or some kind of crash unwinding. Since the upgrade to
Ubuntu 25.04, these all seem to be resolved with the exception of a bug
in the host `__floatundisf` [1].

[1] https://github.com/rust-lang/compiler-builtins/pull/384#issuecomment-740413334
---
 builtins-test-intrinsics/src/main.rs  | 84 +++++----------------------
 builtins-test/benches/float_conv.rs   |  9 ---
 builtins-test/benches/float_extend.rs |  2 -
 builtins-test/benches/float_trunc.rs  |  5 --
 builtins-test/src/bench.rs            | 11 ----
 builtins-test/tests/conv.rs           | 38 ++++++------
 crates/musl-math-sys/src/lib.rs       |  2 -
 libm/src/math/j1f.rs                  |  3 +-
 8 files changed, 34 insertions(+), 120 deletions(-)

diff --git a/builtins-test-intrinsics/src/main.rs b/builtins-test-intrinsics/src/main.rs
index 66744a081..b9d19ea77 100644
--- a/builtins-test-intrinsics/src/main.rs
+++ b/builtins-test-intrinsics/src/main.rs
@@ -40,11 +40,7 @@ mod intrinsics {
         x as f64
     }
 
-    #[cfg(all(
-        f16_enabled,
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(all(f16_enabled, f128_enabled))]
     pub fn extendhftf(x: f16) -> f128 {
         x as f128
     }
@@ -201,11 +197,7 @@ mod intrinsics {
 
     /* f128 operations */
 
-    #[cfg(all(
-        f16_enabled,
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(all(f16_enabled, f128_enabled))]
     pub fn trunctfhf(x: f128) -> f16 {
         x as f16
     }
@@ -220,50 +212,32 @@ mod intrinsics {
         x as f64
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixtfsi(x: f128) -> i32 {
         x as i32
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixtfdi(x: f128) -> i64 {
         x as i64
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixtfti(x: f128) -> i128 {
         x as i128
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixunstfsi(x: f128) -> u32 {
         x as u32
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixunstfdi(x: f128) -> u64 {
         x as u64
     }
 
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     pub fn fixunstfti(x: f128) -> u128 {
         x as u128
     }
@@ -540,47 +514,25 @@ fn run() {
     bb(extendhfdf(bb(2.)));
     #[cfg(f16_enabled)]
     bb(extendhfsf(bb(2.)));
-    #[cfg(all(
-        f16_enabled,
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(all(f16_enabled, f128_enabled))]
     bb(extendhftf(bb(2.)));
     #[cfg(f128_enabled)]
     bb(extendsftf(bb(2.)));
     bb(fixdfti(bb(2.)));
     bb(fixsfti(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixtfdi(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixtfsi(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixtfti(bb(2.)));
     bb(fixunsdfti(bb(2.)));
     bb(fixunssfti(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixunstfdi(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixunstfsi(bb(2.)));
-    #[cfg(all(
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(f128_enabled)]
     bb(fixunstfti(bb(2.)));
     #[cfg(f128_enabled)]
     bb(floatditf(bb(2)));
@@ -616,11 +568,7 @@ fn run() {
     bb(truncsfhf(bb(2.)));
     #[cfg(f128_enabled)]
     bb(trunctfdf(bb(2.)));
-    #[cfg(all(
-        f16_enabled,
-        f128_enabled,
-        not(any(target_arch = "powerpc", target_arch = "powerpc64"))
-    ))]
+    #[cfg(all(f16_enabled, f128_enabled))]
     bb(trunctfhf(bb(2.)));
     #[cfg(f128_enabled)]
     bb(trunctfsf(bb(2.)));
diff --git a/builtins-test/benches/float_conv.rs b/builtins-test/benches/float_conv.rs
index d4a7346d1..e0f488eb6 100644
--- a/builtins-test/benches/float_conv.rs
+++ b/builtins-test/benches/float_conv.rs
@@ -365,7 +365,6 @@ float_bench! {
 
 /* float -> unsigned int */
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_u32,
     sig: (a: f32) -> u32,
@@ -387,7 +386,6 @@ float_bench! {
     ],
 }
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_u64,
     sig: (a: f32) -> u64,
@@ -409,7 +407,6 @@ float_bench! {
     ],
 }
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_u128,
     sig: (a: f32) -> u128,
@@ -505,7 +502,6 @@ float_bench! {
 
 /* float -> signed int */
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_i32,
     sig: (a: f32) -> i32,
@@ -527,7 +523,6 @@ float_bench! {
     ],
 }
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_i64,
     sig: (a: f32) -> i64,
@@ -549,7 +544,6 @@ float_bench! {
     ],
 }
 
-#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
 float_bench! {
     name: conv_f32_i128,
     sig: (a: f32) -> i128,
@@ -666,9 +660,6 @@ pub fn float_conv() {
     conv_f64_i128(&mut criterion);
 
     #[cfg(f128_enabled)]
-    // FIXME: ppc64le has a sporadic overflow panic in the crate functions
-    // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
-    #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
     {
         conv_u32_f128(&mut criterion);
         conv_u64_f128(&mut criterion);
diff --git a/builtins-test/benches/float_extend.rs b/builtins-test/benches/float_extend.rs
index fc44e80c9..939dc60f9 100644
--- a/builtins-test/benches/float_extend.rs
+++ b/builtins-test/benches/float_extend.rs
@@ -110,9 +110,7 @@ float_bench! {
 pub fn float_extend() {
     let mut criterion = Criterion::default().configure_from_args();
 
-    // FIXME(#655): `f16` tests disabled until we can bootstrap symbols
     #[cfg(f16_enabled)]
-    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     {
         extend_f16_f32(&mut criterion);
         extend_f16_f64(&mut criterion);
diff --git a/builtins-test/benches/float_trunc.rs b/builtins-test/benches/float_trunc.rs
index 43310c7cf..9373f945b 100644
--- a/builtins-test/benches/float_trunc.rs
+++ b/builtins-test/benches/float_trunc.rs
@@ -121,9 +121,7 @@ float_bench! {
 pub fn float_trunc() {
     let mut criterion = Criterion::default().configure_from_args();
 
-    // FIXME(#655): `f16` tests disabled until we can bootstrap symbols
     #[cfg(f16_enabled)]
-    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     {
         trunc_f32_f16(&mut criterion);
         trunc_f64_f16(&mut criterion);
@@ -133,11 +131,8 @@ pub fn float_trunc() {
 
     #[cfg(f128_enabled)]
     {
-        // FIXME(#655): `f16` tests disabled until we can bootstrap symbols
         #[cfg(f16_enabled)]
-        #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
         trunc_f128_f16(&mut criterion);
-
         trunc_f128_f32(&mut criterion);
         trunc_f128_f64(&mut criterion);
     }
diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs
index 098718567..8a513ad67 100644
--- a/builtins-test/src/bench.rs
+++ b/builtins-test/src/bench.rs
@@ -23,11 +23,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         "mul_f64",
     ];
 
-    // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely
-    // in their benchmark modules due to runtime panics.
-    // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
-    const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"];
-
     // FIXME(f16_f128): system symbols have incorrect results
     // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
     const X86_NO_SSE_SKIPPED: &[&str] = &[
@@ -57,12 +52,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         return true;
     }
 
-    if cfg!(all(target_arch = "powerpc64", target_endian = "little"))
-        && PPC64LE_SKIPPED.contains(&test_name)
-    {
-        return true;
-    }
-
     if cfg!(all(target_arch = "x86", not(target_feature = "sse")))
         && X86_NO_SSE_SKIPPED.contains(&test_name)
     {
diff --git a/builtins-test/tests/conv.rs b/builtins-test/tests/conv.rs
index 7d729364f..9b04295d2 100644
--- a/builtins-test/tests/conv.rs
+++ b/builtins-test/tests/conv.rs
@@ -59,32 +59,28 @@ mod i_to_f {
                                 || ((error_minus == error || error_plus == error)
                                     && ((f0.to_bits() & 1) != 0))
                             {
-                                if !cfg!(any(
-                                    target_arch = "powerpc",
-                                    target_arch = "powerpc64"
-                                )) {
-                                    panic!(
-                                        "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
-                                        stringify!($fn),
-                                        x,
-                                        f1.to_bits(),
-                                        y_minus_ulp,
-                                        y,
-                                        y_plus_ulp,
-                                        error_minus,
-                                        error,
-                                        error_plus,
-                                    );
-                                }
+                                panic!(
+                                    "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
+                                    stringify!($fn),
+                                    x,
+                                    f1.to_bits(),
+                                    y_minus_ulp,
+                                    y,
+                                    y_plus_ulp,
+                                    error_minus,
+                                    error,
+                                    error_plus,
+                                );
                             }
                         }
 
-                        // Test against native conversion. We disable testing on all `x86` because of
-                        // rounding bugs with `i686`. `powerpc` also has the same rounding bug.
+                        // Test against native conversion.
+                        // FIXME(x86,ppc): the platform version has rounding bugs on i686 and
+                        // PowerPC64le (for PPC this only shows up in Docker, not the native runner).
+                        // https://github.com/rust-lang/compiler-builtins/pull/384#issuecomment-740413334
                         if !Float::eq_repr(f0, f1) && !cfg!(any(
                             target_arch = "x86",
-                            target_arch = "powerpc",
-                            target_arch = "powerpc64"
+                            all(target_arch = "powerpc64", target_endian = "little")
                         )) {
                             panic!(
                                 "{}({}): std: {:?}, builtins: {:?}",
diff --git a/crates/musl-math-sys/src/lib.rs b/crates/musl-math-sys/src/lib.rs
index 6a4bf4859..9cab8deef 100644
--- a/crates/musl-math-sys/src/lib.rs
+++ b/crates/musl-math-sys/src/lib.rs
@@ -40,8 +40,6 @@ macro_rules! functions {
     ) => {
         // Run a simple check to ensure we can link and call the function without crashing.
         #[test]
-        // FIXME(#309): LE PPC crashes calling some musl functions
-        #[cfg_attr(all(target_arch = "powerpc64", target_endian = "little"), ignore)]
         fn $name() {
             <fn($($aty),+) -> $rty>::check(super::$name);
         }
diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs
index a47472401..da5413ac2 100644
--- a/libm/src/math/j1f.rs
+++ b/libm/src/math/j1f.rs
@@ -361,8 +361,6 @@ fn qonef(x: f32) -> f32 {
     return (0.375 + r / s) / x;
 }
 
-// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
-#[cfg(not(target_arch = "powerpc64"))]
 #[cfg(test)]
 mod tests {
     use super::{j1f, y1f};
@@ -371,6 +369,7 @@ mod tests {
         // 0x401F3E49
         assert_eq!(j1f(2.4881766_f32), 0.49999475_f32);
     }
+
     #[test]
     fn test_y1f_2002() {
         //allow slightly different result on x87

From 82f9186e6baa0e3b4c0e51b35e0a79099ab456d2 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 24 Jul 2025 07:28:38 -0500
Subject: [PATCH 109/133] Enable tests that were skipped on aarch64

The LLVM issue was resolved a while ago, these should no longer be a
problem.
---
 builtins-test/src/bench.rs | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs
index 8a513ad67..9ba674294 100644
--- a/builtins-test/src/bench.rs
+++ b/builtins-test/src/bench.rs
@@ -29,11 +29,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         "add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64",
     ];
 
-    // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
-    // uses `compiler-rt` version.
-    // <https://github.com/llvm/llvm-project/issues/91840>
-    const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"];
-
     // FIXME(llvm): system symbols have incorrect results on Windows
     // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
     const WINDOWS_SKIPPED: &[&str] = &[
@@ -58,10 +53,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         return true;
     }
 
-    if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) {
-        return true;
-    }
-
     if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) {
         return true;
     }

From 84507ccd6fc0f95082b8aaa1e199131bba1b7af8 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 24 Jul 2025 07:31:49 -0500
Subject: [PATCH 110/133] Enable skipped `f32` and `f64` multiplication tests

The fix has since made it to nightly, so the skips here can be removed.
---
 builtins-test/src/bench.rs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs
index 9ba674294..bca9f8418 100644
--- a/builtins-test/src/bench.rs
+++ b/builtins-test/src/bench.rs
@@ -17,10 +17,6 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         "extend_f16_f32",
         "trunc_f32_f16",
         "trunc_f64_f16",
-        // FIXME(#616): re-enable once fix is in nightly
-        // <https://github.com/rust-lang/compiler-builtins/issues/616>
-        "mul_f32",
-        "mul_f64",
     ];
 
     // FIXME(f16_f128): system symbols have incorrect results

From 71f73a58b95784852370a95c4d5744e8d98f6522 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 24 Jul 2025 18:55:27 +0000
Subject: [PATCH 111/133] Use `x86_no_sse` configuration in more places

Emit `x86_no_sse` in the compiler-builtins (and builtins-test) build
script, and use it to simplify `all(target_arch = "x86",
not(target_fefature = "sse))` configuration.
---
 builtins-test/src/bench.rs       | 4 +---
 builtins-test/tests/addsub.rs    | 4 ++--
 builtins-test/tests/div_rem.rs   | 2 +-
 builtins-test/tests/float_pow.rs | 3 ++-
 builtins-test/tests/mul.rs       | 4 ++--
 compiler-builtins/build.rs       | 7 -------
 compiler-builtins/configure.rs   | 7 +++++++
 libm/src/math/rem_pio2.rs        | 2 +-
 8 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/builtins-test/src/bench.rs b/builtins-test/src/bench.rs
index bca9f8418..4bdcf482c 100644
--- a/builtins-test/src/bench.rs
+++ b/builtins-test/src/bench.rs
@@ -43,9 +43,7 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         return true;
     }
 
-    if cfg!(all(target_arch = "x86", not(target_feature = "sse")))
-        && X86_NO_SSE_SKIPPED.contains(&test_name)
-    {
+    if cfg!(x86_no_sse) && X86_NO_SSE_SKIPPED.contains(&test_name) {
         return true;
     }
 
diff --git a/builtins-test/tests/addsub.rs b/builtins-test/tests/addsub.rs
index 865b9e472..abe7dde64 100644
--- a/builtins-test/tests/addsub.rs
+++ b/builtins-test/tests/addsub.rs
@@ -111,7 +111,7 @@ macro_rules! float_sum {
     }
 }
 
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 mod float_addsub {
     use super::*;
 
@@ -122,7 +122,7 @@ mod float_addsub {
 }
 
 #[cfg(f128_enabled)]
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
 mod float_addsub_f128 {
     use super::*;
diff --git a/builtins-test/tests/div_rem.rs b/builtins-test/tests/div_rem.rs
index e8327f9b4..caee4166c 100644
--- a/builtins-test/tests/div_rem.rs
+++ b/builtins-test/tests/div_rem.rs
@@ -138,7 +138,7 @@ macro_rules! float {
     };
 }
 
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 mod float_div {
     use super::*;
 
diff --git a/builtins-test/tests/float_pow.rs b/builtins-test/tests/float_pow.rs
index 0e8ae88e8..a17dff27c 100644
--- a/builtins-test/tests/float_pow.rs
+++ b/builtins-test/tests/float_pow.rs
@@ -1,7 +1,7 @@
 #![allow(unused_macros)]
 #![cfg_attr(f128_enabled, feature(f128))]
-#![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
 
+#[cfg_attr(x86_no_sse, allow(unused))]
 use builtins_test::*;
 
 // This is approximate because of issues related to
@@ -52,6 +52,7 @@ macro_rules! pow {
     };
 }
 
+#[cfg(not(x86_no_sse))] // FIXME(i586): failure for powidf2
 pow! {
     f32, 1e-4, __powisf2, all();
     f64, 1e-12, __powidf2, all();
diff --git a/builtins-test/tests/mul.rs b/builtins-test/tests/mul.rs
index 58bc9ab4a..3072b45dc 100644
--- a/builtins-test/tests/mul.rs
+++ b/builtins-test/tests/mul.rs
@@ -113,7 +113,7 @@ macro_rules! float_mul {
     };
 }
 
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 mod float_mul {
     use super::*;
 
@@ -126,7 +126,7 @@ mod float_mul {
 }
 
 #[cfg(f128_enabled)]
-#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+#[cfg(not(x86_no_sse))]
 #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
 mod float_mul_f128 {
     use super::*;
diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs
index 8f51c12b5..43b978606 100644
--- a/compiler-builtins/build.rs
+++ b/compiler-builtins/build.rs
@@ -106,13 +106,6 @@ fn configure_libm(target: &Target) {
         println!("cargo:rustc-cfg=optimizations_enabled");
     }
 
-    // Config shorthands
-    println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
-    if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") {
-        // Shorthand to detect i586 targets
-        println!("cargo:rustc-cfg=x86_no_sse");
-    }
-
     println!(
         "cargo:rustc-env=CFG_CARGO_FEATURES={:?}",
         target.cargo_features
diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs
index 9721ddf09..caedc034d 100644
--- a/compiler-builtins/configure.rs
+++ b/compiler-builtins/configure.rs
@@ -100,6 +100,13 @@ pub fn configure_aliases(target: &Target) {
         println!("cargo:rustc-cfg=thumb_1")
     }
 
+    // Config shorthands
+    println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
+    if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") {
+        // Shorthand to detect i586 targets
+        println!("cargo:rustc-cfg=x86_no_sse");
+    }
+
     /* Not all backends support `f16` and `f128` to the same level on all architectures, so we
      * need to disable things if the compiler may crash. See configuration at:
      * * https://github.com/rust-lang/rust/blob/c65dccabacdfd6c8a7f7439eba13422fdd89b91e/compiler/rustc_codegen_llvm/src/llvm_util.rs#L367-L432
diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs
index d677fd9dc..648dca170 100644
--- a/libm/src/math/rem_pio2.rs
+++ b/libm/src/math/rem_pio2.rs
@@ -195,7 +195,7 @@ mod tests {
 
     #[test]
     // FIXME(correctness): inaccurate results on i586
-    #[cfg_attr(all(target_arch = "x86", not(target_feature = "sse")), ignore)]
+    #[cfg_attr(x86_no_sse, ignore)]
     fn test_near_pi() {
         let arg = 3.141592025756836;
         let arg = force_eval!(arg);

From c22b8485289d1a01329df520ee4d34b0cd187f95 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 26 Jul 2025 16:51:58 -0500
Subject: [PATCH 112/133] libm: Update for new warn-by-default clippy lints

Silence the approximate constant lint because it is noisy and not always
correct. `single_component_path_imports` is also not accurate when built
as part of `compiler-builtins`, so that needs to be `allow`ed as well.
---
 libm/src/math/mod.rs         | 2 ++
 libm/src/math/support/mod.rs | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs
index ce9b8fc58..8eecfe566 100644
--- a/libm/src/math/mod.rs
+++ b/libm/src/math/mod.rs
@@ -1,3 +1,5 @@
+#![allow(clippy::approx_constant)] // many false positives
+
 macro_rules! force_eval {
     ($e:expr) => {
         unsafe { ::core::ptr::read_volatile(&$e) }
diff --git a/libm/src/math/support/mod.rs b/libm/src/math/support/mod.rs
index 2e7edd03c..b2d7bd8d5 100644
--- a/libm/src/math/support/mod.rs
+++ b/libm/src/math/support/mod.rs
@@ -11,7 +11,8 @@ mod int_traits;
 
 #[allow(unused_imports)]
 pub use big::{i256, u256};
-#[allow(unused_imports)]
+// Clippy seems to have a false positive
+#[allow(unused_imports, clippy::single_component_path_imports)]
 pub(crate) use cfg_if;
 pub use env::{FpResult, Round, Status};
 #[allow(unused_imports)]

From c4966f9b0d4323183933e98dea4d0a77f52c0e0a Mon Sep 17 00:00:00 2001
From: quaternic <57393910+quaternic@users.noreply.github.com>
Date: Sun, 27 Jul 2025 08:26:58 +0300
Subject: [PATCH 113/133] Avoid inlining `floor` into `rem_pio2`

Possible workaround for
https://github.com/rust-lang/compiler-builtins/pull/976#issuecomment-3085530354

Inline assembly in the body of a function currently causes the compiler
to consider that function possibly unwinding, even if said asm
originated from inlining an `extern "C"` function. This patch wraps the
problematic callsite with `#[inline(never)]`.
---
 libm/src/math/rem_pio2_large.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs
index 6d679bbe9..792c09fb1 100644
--- a/libm/src/math/rem_pio2_large.rs
+++ b/libm/src/math/rem_pio2_large.rs
@@ -11,7 +11,7 @@
  * ====================================================
  */
 
-use super::{floor, scalbn};
+use super::scalbn;
 
 // initial value for jk
 const INIT_JK: [usize; 4] = [3, 4, 4, 6];
@@ -223,6 +223,14 @@ const PIO2: [f64; 8] = [
 /// independent of the exponent of the input.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 {
+    // FIXME(rust-lang/rust#144518): Inline assembly would cause `no_panic` to fail
+    // on the callers of this function. As a workaround, avoid inlining `floor` here
+    // when implemented with assembly.
+    #[cfg_attr(x86_no_sse, inline(never))]
+    extern "C" fn floor(x: f64) -> f64 {
+        super::floor(x)
+    }
+
     let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
     let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24)
 

From a4f24dc2537e75661ca693acd9331c8d7f5a7750 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Sun, 27 Jul 2025 23:27:40 +0200
Subject: [PATCH 114/133] Implement `floor` and `ceil` in assembly on `i586`

Fixes: https://github.com/rust-lang/compiler-builtins/issues/837

The assembly is based on

- https://github.com/NetBSD/src/blob/20433927938987dd64c8f6aa46904b7aca3fa39e/lib/libm/arch/i387/s_floor.S
- https://github.com/NetBSD/src/blob/20433927938987dd64c8f6aa46904b7aca3fa39e/lib/libm/arch/i387/s_ceil.S

Which both state

    /*
     * Written by J.T. Conklin <jtc@NetBSD.org>.
     * Public domain.
     */

Which I believe means we're good in terms of licensing.
---
 libm-test/src/precision.rs | 22 ----------
 libm/src/math/arch/i586.rs | 85 ++++++++++++++++++++++++--------------
 2 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs
index 32825b15d..3fb8c1b37 100644
--- a/libm-test/src/precision.rs
+++ b/libm-test/src/precision.rs
@@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
 
 impl MaybeOverride<(f64,)> for SpecialCase {
     fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
-        if cfg!(x86_no_sse)
-            && ctx.base_name == BaseName::Ceil
-            && ctx.basis == CheckBasis::Musl
-            && input.0 < 0.0
-            && input.0 > -1.0
-            && expected == F::ZERO
-            && actual == F::ZERO
-        {
-            // musl returns -0.0, we return +0.0
-            return XFAIL("i586 ceil signed zero");
-        }
-
         if cfg!(x86_no_sse)
             && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
             && (expected - actual).abs() <= F::ONE
@@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             return XFAIL("i586 rint rounding mode");
         }
 
-        if cfg!(x86_no_sse)
-            && (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
-            && expected.eq_repr(F::NEG_ZERO)
-            && actual.eq_repr(F::ZERO)
-        {
-            // FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
-            // See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
-            return XFAIL("i586 ceil/floor signed zero");
-        }
-
         if cfg!(x86_no_sse)
             && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
         {
diff --git a/libm/src/math/arch/i586.rs b/libm/src/math/arch/i586.rs
index f92b9a2af..b9a667620 100644
--- a/libm/src/math/arch/i586.rs
+++ b/libm/src/math/arch/i586.rs
@@ -1,37 +1,62 @@
 //! Architecture-specific support for x86-32 without SSE2
+//!
+//! We use an alternative implementation on x86, because the
+//! main implementation fails with the x87 FPU used by
+//! debian i386, probably due to excess precision issues.
+//!
+//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
+//! functions are implemented in this way.
 
-use super::super::fabs;
-
-/// Use an alternative implementation on x86, because the
-/// main implementation fails with the x87 FPU used by
-/// debian i386, probably due to excess precision issues.
-/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
-pub fn ceil(x: f64) -> f64 {
-    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-        let truncated = x as i64 as f64;
-        if truncated < x {
-            return truncated + 1.0;
-        } else {
-            return truncated;
-        }
-    } else {
-        return x;
+pub fn ceil(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            // Save the FPU control word, using `x` as scratch space.
+            "fstcw [{x}]",
+            // Set rounding control to 0b10 (+∞).
+            "mov word ptr [{x} + 2], 0x0b7f",
+            "fldcw [{x} + 2]",
+            // Round.
+            "frndint",
+            // Restore FPU control word.
+            "fldcw [{x}]",
+            // Save rounded value to memory.
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
     }
+    x
 }
 
-/// Use an alternative implementation on x86, because the
-/// main implementation fails with the x87 FPU used by
-/// debian i386, probably due to excess precision issues.
-/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
-pub fn floor(x: f64) -> f64 {
-    if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
-        let truncated = x as i64 as f64;
-        if truncated > x {
-            return truncated - 1.0;
-        } else {
-            return truncated;
-        }
-    } else {
-        return x;
+pub fn floor(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            // Save the FPU control word, using `x` as scratch space.
+            "fstcw [{x}]",
+            // Set rounding control to 0b01 (-∞).
+            "mov word ptr [{x} + 2], 0x077f",
+            "fldcw [{x} + 2]",
+            // Round.
+            "frndint",
+            // Restore FPU control word.
+            "fldcw [{x}]",
+            // Save rounded value to memory.
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
     }
+    x
 }

From b7cdb7334d4eb4dac6adf5b97819bae8911c4ab0 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 25 Jul 2025 17:36:25 -0500
Subject: [PATCH 115/133] Remove `no-asm` gating when there is no alternative
 implementation

Assembly-related configuration was added in 1621c6dbf9eb ("Use
`specialized-div-rem` 1.0.0 for division algorithms") to account for
Cranelift not yet supporting assembly. This hasn't been relevant for a
while, so we no longer need to gate `asm!` behind this configuration.
Thus, remove `cfg(not(feature = "no-asm"))` in places where there is no
generic fallback.

There are other cases, however, where setting the `no-asm` configuration
enables testing of generic version of builtins when there are platform-
specific implementations available; these cases are left unchanged. This
could be improved in the future by exposing both versions for testing
rather than using a configuration and running the entire testsuite
twice.

This is the compiler-builtins portion of
https://github.com/rust-lang/rust/pull/144471.
---
 builtins-shim/Cargo.toml            |  5 +++--
 builtins-test/tests/lse.rs          |  2 +-
 compiler-builtins/Cargo.toml        |  5 +++--
 compiler-builtins/src/aarch64.rs    |  2 +-
 compiler-builtins/src/arm.rs        |  2 --
 compiler-builtins/src/hexagon.rs    |  2 --
 compiler-builtins/src/lib.rs        |  2 +-
 compiler-builtins/src/probestack.rs |  2 --
 compiler-builtins/src/x86.rs        | 10 ++--------
 compiler-builtins/src/x86_64.rs     |  9 +--------
 10 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/builtins-shim/Cargo.toml b/builtins-shim/Cargo.toml
index 8eb880c6f..707ebdbc7 100644
--- a/builtins-shim/Cargo.toml
+++ b/builtins-shim/Cargo.toml
@@ -37,8 +37,9 @@ default = ["compiler-builtins"]
 # implementations and also filling in unimplemented intrinsics
 c = ["dep:cc"]
 
-# Workaround for the Cranelift codegen backend. Disables any implementations
-# which use inline assembly and fall back to pure Rust versions (if available).
+# For implementations where there is both a generic version and a platform-
+# specific version, use the generic version. This is meant to enable testing
+# the generic versions on all platforms.
 no-asm = []
 
 # Workaround for codegen backends which haven't yet implemented `f16` and
diff --git a/builtins-test/tests/lse.rs b/builtins-test/tests/lse.rs
index 0d85228d7..5d59fbb7f 100644
--- a/builtins-test/tests/lse.rs
+++ b/builtins-test/tests/lse.rs
@@ -1,6 +1,6 @@
 #![feature(decl_macro)] // so we can use pub(super)
 #![feature(macro_metavar_expr_concat)]
-#![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))]
+#![cfg(all(target_arch = "aarch64", target_os = "linux"))]
 
 /// Translate a byte size to a Rust type.
 macro int_ty {
diff --git a/compiler-builtins/Cargo.toml b/compiler-builtins/Cargo.toml
index 3ccb05f73..8bbe136ce 100644
--- a/compiler-builtins/Cargo.toml
+++ b/compiler-builtins/Cargo.toml
@@ -35,8 +35,9 @@ default = ["compiler-builtins"]
 # implementations and also filling in unimplemented intrinsics
 c = ["dep:cc"]
 
-# Workaround for the Cranelift codegen backend. Disables any implementations
-# which use inline assembly and fall back to pure Rust versions (if available).
+# For implementations where there is both a generic version and a platform-
+# specific version, use the generic version. This is meant to enable testing
+# the generic versions on all platforms.
 no-asm = []
 
 # Workaround for codegen backends which haven't yet implemented `f16` and
diff --git a/compiler-builtins/src/aarch64.rs b/compiler-builtins/src/aarch64.rs
index a72b30d29..039fab206 100644
--- a/compiler-builtins/src/aarch64.rs
+++ b/compiler-builtins/src/aarch64.rs
@@ -4,7 +4,7 @@ use core::intrinsics;
 
 intrinsics! {
     #[unsafe(naked)]
-    #[cfg(all(target_os = "uefi", not(feature = "no-asm")))]
+    #[cfg(target_os = "uefi")]
     pub unsafe extern "custom" fn __chkstk() {
         core::arch::naked_asm!(
             ".p2align 2",
diff --git a/compiler-builtins/src/arm.rs b/compiler-builtins/src/arm.rs
index fbec93ca4..0c15b37df 100644
--- a/compiler-builtins/src/arm.rs
+++ b/compiler-builtins/src/arm.rs
@@ -1,5 +1,3 @@
-#![cfg(not(feature = "no-asm"))]
-
 // Interfaces used by naked trampolines.
 // SAFETY: these are defined in compiler-builtins
 unsafe extern "C" {
diff --git a/compiler-builtins/src/hexagon.rs b/compiler-builtins/src/hexagon.rs
index 91cf91c31..a5c7b4dfd 100644
--- a/compiler-builtins/src/hexagon.rs
+++ b/compiler-builtins/src/hexagon.rs
@@ -1,5 +1,3 @@
-#![cfg(not(feature = "no-asm"))]
-
 use core::arch::global_asm;
 
 global_asm!(include_str!("hexagon/func_macro.s"), options(raw));
diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs
index fe0ad81dd..ca75f44e0 100644
--- a/compiler-builtins/src/lib.rs
+++ b/compiler-builtins/src/lib.rs
@@ -60,7 +60,7 @@ pub mod arm;
 #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
 pub mod aarch64;
 
-#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))]
+#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
 pub mod aarch64_linux;
 
 #[cfg(all(
diff --git a/compiler-builtins/src/probestack.rs b/compiler-builtins/src/probestack.rs
index f4105dde5..9a18216da 100644
--- a/compiler-builtins/src/probestack.rs
+++ b/compiler-builtins/src/probestack.rs
@@ -44,8 +44,6 @@
 #![cfg(not(feature = "mangled-names"))]
 // Windows and Cygwin already has builtins to do this.
 #![cfg(not(any(windows, target_os = "cygwin")))]
-// All these builtins require assembly
-#![cfg(not(feature = "no-asm"))]
 // We only define stack probing for these architectures today.
 #![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
 
diff --git a/compiler-builtins/src/x86.rs b/compiler-builtins/src/x86.rs
index 16e50922a..51940b3b3 100644
--- a/compiler-builtins/src/x86.rs
+++ b/compiler-builtins/src/x86.rs
@@ -9,10 +9,7 @@ use core::intrinsics;
 
 intrinsics! {
     #[unsafe(naked)]
-    #[cfg(all(
-        any(all(windows, target_env = "gnu"), target_os = "uefi"),
-        not(feature = "no-asm")
-    ))]
+    #[cfg(any(all(windows, target_env = "gnu"), target_os = "uefi"))]
     pub unsafe extern "custom" fn __chkstk() {
         core::arch::naked_asm!(
             "jmp {}", // Jump to __alloca since fallthrough may be unreliable"
@@ -21,10 +18,7 @@ intrinsics! {
     }
 
     #[unsafe(naked)]
-    #[cfg(all(
-        any(all(windows, target_env = "gnu"), target_os = "uefi"),
-        not(feature = "no-asm")
-    ))]
+    #[cfg(any(all(windows, target_env = "gnu"), target_os = "uefi"))]
     pub unsafe extern "custom" fn _alloca() {
         // __chkstk and _alloca are the same function
         core::arch::naked_asm!(
diff --git a/compiler-builtins/src/x86_64.rs b/compiler-builtins/src/x86_64.rs
index 9b7133b48..f9ae784d5 100644
--- a/compiler-builtins/src/x86_64.rs
+++ b/compiler-builtins/src/x86_64.rs
@@ -9,14 +9,7 @@ use core::intrinsics;
 
 intrinsics! {
     #[unsafe(naked)]
-    #[cfg(all(
-        any(
-            all(windows, target_env = "gnu"),
-            target_os = "cygwin",
-            target_os = "uefi"
-        ),
-        not(feature = "no-asm")
-    ))]
+    #[cfg(any(all(windows, target_env = "gnu"), target_os = "cygwin", target_os = "uefi"))]
     pub unsafe extern "custom" fn ___chkstk_ms() {
         core::arch::naked_asm!(
             "push   %rcx",

From b56560b9696cc394e6030fa2e35ebc552c7e0962 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Tue, 29 Jul 2025 10:20:22 +0200
Subject: [PATCH 116/133] Switch to using a GH app for authenticating sync PRs

So there will no longer be the need to close and reopen sync PRs in
order for CI to run.
---
 .github/workflows/rustc-pull.yml | 5 +++--
 triagebot.toml                   | 3 ---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/rustc-pull.yml b/.github/workflows/rustc-pull.yml
index ba698492e..ad7693e17 100644
--- a/.github/workflows/rustc-pull.yml
+++ b/.github/workflows/rustc-pull.yml
@@ -12,12 +12,13 @@ jobs:
     if: github.repository == 'rust-lang/compiler-builtins'
     uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main
     with:
+      github-app-id: ${{ vars.APP_CLIENT_ID }}
       # https://rust-lang.zulipchat.com/#narrow/channel/219381-t-libs/topic/compiler-builtins.20subtree.20sync.20automation/with/528482375
       zulip-stream-id: 219381
       zulip-topic: 'compiler-builtins subtree sync automation'
-      zulip-bot-email:  "compiler-builtins-ci-bot@rust-lang.zulipchat.com"
+      zulip-bot-email: "compiler-builtins-ci-bot@rust-lang.zulipchat.com"
       pr-base-branch: master
       branch-name: rustc-pull
     secrets:
       zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
-      token: ${{ secrets.GITHUB_TOKEN }}
+      github-app-secret: ${{ secrets.APP_PRIVATE_KEY }}
diff --git a/triagebot.toml b/triagebot.toml
index 8a2356c2b..eba5cdd88 100644
--- a/triagebot.toml
+++ b/triagebot.toml
@@ -19,6 +19,3 @@ check-commits = false
 # Enable issue transfers within the org
 # Documentation at: https://forge.rust-lang.org/triagebot/transfer.html
 [transfer]
-
-# Automatically close and reopen PRs made by bots to run CI on them
-[bot-pull-requests]

From 2086325b3171988b5ca8c0b8298bb3abb10a1bb9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 29 Jul 2025 18:56:46 +0000
Subject: [PATCH 117/133] cleanup: Trim trailing whitespace

---
 .github/workflows/main.yaml | 4 ++--
 ci/run.sh                   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 6c98a60d2..0c4b49cd9 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -166,7 +166,7 @@ jobs:
       shell: bash
     - run: echo "RUST_COMPILER_RT_ROOT=$(realpath ./compiler-rt)" >> "$GITHUB_ENV"
       shell: bash
-      
+
     - name: Download musl source
       run: ./ci/update-musl.sh
       shell: bash
@@ -278,7 +278,7 @@ jobs:
       with:
         name: ${{ env.BASELINE_NAME }}
         path: ${{ env.BASELINE_NAME }}.tar.xz
-    
+
     - name: Run wall time benchmarks
       run: |
         # Always use the same seed for benchmarks. Ideally we should switch to a
diff --git a/ci/run.sh b/ci/run.sh
index 8b7965bb2..4b43536d3 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -161,7 +161,7 @@ else
     mflags+=(--workspace --target "$target")
     cmd=(cargo test "${mflags[@]}")
     profile_flag="--profile"
-    
+
     # If nextest is available, use that
     command -v cargo-nextest && nextest=1 || nextest=0
     if [ "$nextest" = "1" ]; then
@@ -204,7 +204,7 @@ else
     "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics --benches
 
     # Ensure that the routines do not panic.
-    # 
+    #
     # `--tests` must be passed because no-panic is only enabled as a dev
     # dependency. The `release-opt` profile must be used to enable LTO and a
     # single CGU.

From 16d9435403c92755ed6c4bbb38db0ab25cbdef51 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 29 Jul 2025 19:04:32 +0000
Subject: [PATCH 118/133] ci: Simplify tests for verbatim paths

Rather than setting an environment variable in the workflow job based on
whether or not the environment is non-MinGW Windows, we can just check
this in the ci script.

This was originally added in b0f19660f0 ("Add tests for UNC paths on
windows builds") and its followup commits.
---
 .github/workflows/main.yaml | 4 ----
 ci/run.sh                   | 5 ++++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 0c4b49cd9..94b519e3c 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -50,7 +50,6 @@ jobs:
           os: ubuntu-24.04-arm
         - target: aarch64-pc-windows-msvc
           os: windows-2025
-          test_verbatim: 1
           build_only: 1
         - target: arm-unknown-linux-gnueabi
           os: ubuntu-24.04
@@ -92,10 +91,8 @@ jobs:
           os: macos-13
         - target: i686-pc-windows-msvc
           os: windows-2025
-          test_verbatim: 1
         - target: x86_64-pc-windows-msvc
           os: windows-2025
-          test_verbatim: 1
         - target: i686-pc-windows-gnu
           os: windows-2025
           channel: nightly-i686-gnu
@@ -106,7 +103,6 @@ jobs:
     needs: [calculate_vars]
     env:
       BUILD_ONLY: ${{ matrix.build_only }}
-      TEST_VERBATIM: ${{ matrix.test_verbatim }}
       MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }}
     steps:
     - name: Print $HOME
diff --git a/ci/run.sh b/ci/run.sh
index 4b43536d3..bc94d42fe 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -41,7 +41,10 @@ else
     "${test_builtins[@]}" --benches
     "${test_builtins[@]}" --benches --release
 
-    if [ "${TEST_VERBATIM:-}" = "1" ]; then
+    # Validate that having a verbatim path for the target directory works
+    # (trivial to regress using `/` in paths to build artifacts rather than
+    # `Path::join`). MinGW does not currently support these paths.
+    if [[ "$target" = *"windows"* ]] && [[ "$target" != *"gnu"* ]]; then
         verb_path=$(cmd.exe //C echo \\\\?\\%cd%\\builtins-test\\target2)
         "${test_builtins[@]}" --target-dir "$verb_path" --features c
     fi

From aa25c33bf42c483998c9307ba2b7416be168fccf Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 30 Jul 2025 08:26:59 +0000
Subject: [PATCH 119/133] ci: Switch to strongly typed directives

Replace the current system with something that is more structured and
will also catch unknown directives.
---
 ci/ci-util.py | 79 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 25 deletions(-)

diff --git a/ci/ci-util.py b/ci/ci-util.py
index 3437d304f..1a9c83d23 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -7,6 +7,7 @@
 
 import json
 import os
+import pprint
 import re
 import subprocess as sp
 import sys
@@ -50,15 +51,6 @@
 DEFAULT_BRANCH = "master"
 WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
 ARTIFACT_PREFIX = "baseline-icount*"
-# Place this in a PR body to skip regression checks (must be at the start of a line).
-REGRESSION_DIRECTIVE = "ci: allow-regressions"
-# Place this in a PR body to skip extensive tests
-SKIP_EXTENSIVE_DIRECTIVE = "ci: skip-extensive"
-# Place this in a PR body to allow running a large number of extensive tests. If not
-# set, this script will error out if a threshold is exceeded in order to avoid
-# accidentally spending huge amounts of CI time.
-ALLOW_MANY_EXTENSIVE_DIRECTIVE = "ci: allow-many-extensive"
-MANY_EXTENSIVE_THRESHOLD = 20
 
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
@@ -80,6 +72,48 @@ def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
 
 
+@dataclass(init=False)
+class PrCfg:
+    """Directives that we allow in the commit body to control test behavior.
+
+    These are of the form `ci: foo`, at the start of a line.
+    """
+
+    # Skip regression checks (must be at the start of a line).
+    allow_regressions: bool = False
+    # Don't run extensive tests
+    skip_extensive: bool = False
+
+    # Allow running a large number of extensive tests. If not set, this script
+    # will error out if a threshold is exceeded in order to avoid accidentally
+    # spending huge amounts of CI time.
+    allow_many_extensive: bool = False
+
+    # Max number of extensive tests to run by default
+    MANY_EXTENSIVE_THRESHOLD: int = 20
+
+    # String values of directive names
+    DIR_ALLOW_REGRESSIONS: str = "allow-regressions"
+    DIR_SKIP_EXTENSIVE: str = "skip-extensive"
+    DIR_ALLOW_MANY_EXTENSIVE: str = "allow-many-extensive"
+
+    def __init__(self, body: str):
+        directives = re.finditer(r"^\s*ci:\s*(?P<dir_name>\S*)", body, re.MULTILINE)
+        for dir in directives:
+            name = dir.group("dir_name")
+            if name == self.DIR_ALLOW_REGRESSIONS:
+                self.allow_regressions = True
+            elif name == self.DIR_SKIP_EXTENSIVE:
+                self.skip_extensive = True
+            elif name == self.DIR_ALLOW_MANY_EXTENSIVE:
+                self.allow_many_extensive = True
+            else:
+                eprint(f"Found unexpected directive `{name}`")
+                exit(1)
+
+        pprint.pp(self)
+
+
 @dataclass
 class PrInfo:
     """GitHub response for PR query"""
@@ -88,6 +122,7 @@ class PrInfo:
     commits: list[str]
     created_at: str
     number: int
+    cfg: PrCfg
 
     @classmethod
     def load(cls, pr_number: int | str) -> Self:
@@ -104,13 +139,9 @@ def load(cls, pr_number: int | str) -> Self:
             ],
             text=True,
         )
-        eprint("PR info:", json.dumps(pr_info, indent=4))
-        return cls(**json.loads(pr_info))
-
-    def contains_directive(self, directive: str) -> bool:
-        """Return true if the provided directive is on a line in the PR body"""
-        lines = self.body.splitlines()
-        return any(line.startswith(directive) for line in lines)
+        pr_json = json.loads(pr_info)
+        eprint("PR info:", json.dumps(pr_json, indent=4))
+        return cls(**json.loads(pr_info), cfg=PrCfg(pr_json["body"]))
 
 
 class FunctionDef(TypedDict):
@@ -223,10 +254,8 @@ def emit_workflow_output(self):
 
         if pr_number is not None and len(pr_number) > 0:
             pr = PrInfo.load(pr_number)
-            skip_tests = pr.contains_directive(SKIP_EXTENSIVE_DIRECTIVE)
-            error_on_many_tests = not pr.contains_directive(
-                ALLOW_MANY_EXTENSIVE_DIRECTIVE
-            )
+            skip_tests = pr.cfg.skip_extensive
+            error_on_many_tests = not pr.cfg.allow_many_extensive
 
             if skip_tests:
                 eprint("Skipping all extensive tests")
@@ -257,12 +286,12 @@ def emit_workflow_output(self):
         eprint(f"may_skip_libm_ci={may_skip}")
         eprint(f"total extensive tests: {total_to_test}")
 
-        if error_on_many_tests and total_to_test > MANY_EXTENSIVE_THRESHOLD:
+        if error_on_many_tests and total_to_test > PrCfg.MANY_EXTENSIVE_THRESHOLD:
             eprint(
-                f"More than {MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
-                f" `{ALLOW_MANY_EXTENSIVE_DIRECTIVE}` to the PR body if this is"
+                f"More than {PrCfg.MANY_EXTENSIVE_THRESHOLD} tests would be run; add"
+                f" `{PrCfg.DIR_ALLOW_MANY_EXTENSIVE}` to the PR body if this is"
                 " intentional. If this is refactoring that happens to touch a lot of"
-                f" files, `{SKIP_EXTENSIVE_DIRECTIVE}` can be used instead."
+                f" files, `{PrCfg.DIR_SKIP_EXTENSIVE}` can be used instead."
             )
             exit(1)
 
@@ -372,7 +401,7 @@ def handle_bench_regressions(args: list[str]):
             exit(1)
 
     pr = PrInfo.load(pr_number)
-    if pr.contains_directive(REGRESSION_DIRECTIVE):
+    if pr.cfg.allow_regressions:
         eprint("PR allows regressions")
         return
 

From ff2cc0e38e3ecc59e617ec75856b3f702bb46dea Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 30 Jul 2025 08:30:47 +0000
Subject: [PATCH 120/133] ci: Don't print output twice in `ci-util`

Use `tee` rather than printing to both stdout and stderr.
---
 .github/workflows/main.yaml | 2 +-
 ci/ci-util.py               | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 94b519e3c..939bc34c2 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -34,7 +34,7 @@ jobs:
       - name: Fetch pull request ref
         run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
         if: github.event_name == 'pull_request'
-      - run: python3 ci/ci-util.py generate-matrix >> "$GITHUB_OUTPUT"
+      - run: set -e; python3 ci/ci-util.py generate-matrix | tee "$GITHUB_OUTPUT"
         id: script
 
   test:
diff --git a/ci/ci-util.py b/ci/ci-util.py
index 1a9c83d23..8f74ecfdb 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -282,8 +282,6 @@ def emit_workflow_output(self):
         may_skip = str(self.may_skip_libm_ci()).lower()
         print(f"extensive_matrix={ext_matrix}")
         print(f"may_skip_libm_ci={may_skip}")
-        eprint(f"extensive_matrix={ext_matrix}")
-        eprint(f"may_skip_libm_ci={may_skip}")
         eprint(f"total extensive tests: {total_to_test}")
 
         if error_on_many_tests and total_to_test > PrCfg.MANY_EXTENSIVE_THRESHOLD:

From 568afb8cf55a6a8e5645f9d21aac6139e683ec42 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 30 Jul 2025 08:32:28 +0000
Subject: [PATCH 121/133] ci: Commonize the way `PrInfo` is loaded from env

---
 ci/ci-util.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/ci/ci-util.py b/ci/ci-util.py
index 8f74ecfdb..f43409c5e 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -12,6 +12,7 @@
 import subprocess as sp
 import sys
 from dataclasses import dataclass
+from functools import cache
 from glob import glob
 from inspect import cleandoc
 from os import getenv
@@ -62,7 +63,7 @@
 
 # libm PR CI takes a long time and doesn't need to run unless relevant files have been
 # changed. Anything matching this regex pattern will trigger a run.
-TRIGGER_LIBM_PR_CI = ".*(libm|musl).*"
+TRIGGER_LIBM_CI_FILE_PAT = ".*(libm|musl).*"
 
 TYPES = ["f16", "f32", "f64", "f128"]
 
@@ -125,8 +126,18 @@ class PrInfo:
     cfg: PrCfg
 
     @classmethod
-    def load(cls, pr_number: int | str) -> Self:
-        """For a given PR number, query the body and commit list"""
+    def from_env(cls) -> Self | None:
+        """Create a PR object from the PR_NUMBER environment if set, `None` otherwise."""
+        pr_env = os.environ.get("PR_NUMBER")
+        if pr_env is not None and len(pr_env) > 0:
+            return cls.from_pr(pr_env)
+
+        return None
+
+    @classmethod
+    @cache  # Cache so we don't print info messages multiple times
+    def from_pr(cls, pr_number: int | str) -> Self:
+        """For a given PR number, query the body and commit list."""
         pr_info = sp.check_output(
             [
                 "gh",
@@ -238,22 +249,23 @@ def may_skip_libm_ci(self) -> bool:
         """If this is a PR and no libm files were changed, allow skipping libm
         jobs."""
 
-        if self.is_pr():
-            return all(not re.match(TRIGGER_LIBM_PR_CI, str(f)) for f in self.changed)
+        # Always run on merge CI
+        if not self.is_pr():
+            return False
 
-        return False
+        # By default, run if there are any changed files matching the pattern
+        return all(not re.match(TRIGGER_LIBM_CI_FILE_PAT, str(f)) for f in self.changed)
 
     def emit_workflow_output(self):
         """Create a JSON object a list items for each type's changed files, if any
         did change, and the routines that were affected by the change.
         """
 
-        pr_number = os.environ.get("PR_NUMBER")
         skip_tests = False
         error_on_many_tests = False
 
-        if pr_number is not None and len(pr_number) > 0:
-            pr = PrInfo.load(pr_number)
+        pr = PrInfo.from_env()
+        if pr is not None:
             skip_tests = pr.cfg.skip_extensive
             error_on_many_tests = not pr.cfg.allow_many_extensive
 
@@ -398,7 +410,7 @@ def handle_bench_regressions(args: list[str]):
             eprint(USAGE)
             exit(1)
 
-    pr = PrInfo.load(pr_number)
+    pr = PrInfo.from_pr(pr_number)
     if pr.cfg.allow_regressions:
         eprint("PR allows regressions")
         return

From 1d58d4c778b9e8632bb1649d84becaa5a7a53e03 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 30 Jul 2025 08:33:37 +0000
Subject: [PATCH 122/133] ci: Add a way to run `libm` tests that would
 otherwise be skipped

Introduce a new directive `ci: test-libm` to ensure tests run.
---
 ci/ci-util.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/ci/ci-util.py b/ci/ci-util.py
index f43409c5e..c1db17c6c 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -93,10 +93,14 @@ class PrCfg:
     # Max number of extensive tests to run by default
     MANY_EXTENSIVE_THRESHOLD: int = 20
 
+    # Run tests for `libm` that may otherwise be skipped due to no changed files.
+    always_test_libm: bool = False
+
     # String values of directive names
     DIR_ALLOW_REGRESSIONS: str = "allow-regressions"
     DIR_SKIP_EXTENSIVE: str = "skip-extensive"
     DIR_ALLOW_MANY_EXTENSIVE: str = "allow-many-extensive"
+    DIR_TEST_LIBM: str = "test-libm"
 
     def __init__(self, body: str):
         directives = re.finditer(r"^\s*ci:\s*(?P<dir_name>\S*)", body, re.MULTILINE)
@@ -108,6 +112,8 @@ def __init__(self, body: str):
                 self.skip_extensive = True
             elif name == self.DIR_ALLOW_MANY_EXTENSIVE:
                 self.allow_many_extensive = True
+            elif name == self.DIR_TEST_LIBM:
+                self.always_test_libm = True
             else:
                 eprint(f"Found unexpected directive `{name}`")
                 exit(1)
@@ -253,6 +259,13 @@ def may_skip_libm_ci(self) -> bool:
         if not self.is_pr():
             return False
 
+        pr = PrInfo.from_env()
+        assert pr is not None, "Is a PR but couldn't load PrInfo"
+
+        # Allow opting in to libm tests
+        if pr.cfg.always_test_libm:
+            return False
+
         # By default, run if there are any changed files matching the pattern
         return all(not re.match(TRIGGER_LIBM_CI_FILE_PAT, str(f)) for f in self.changed)
 

From 767e6ebff0f39e89adbffb96350d03eb6b3225d4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 30 Jul 2025 09:56:11 -0500
Subject: [PATCH 123/133] ci: Set pipefail before running ci-util

Currently, a failure in `ci-util.py` does not cause the job to fail
because the pipe eats the failure status . Set pipefail to fix this.

Fixes: ff2cc0e38e3e ("ci: Don't print output twice in `ci-util`")
---
 .github/workflows/main.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 939bc34c2..c54df2e90 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -34,7 +34,9 @@ jobs:
       - name: Fetch pull request ref
         run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
         if: github.event_name == 'pull_request'
-      - run: set -e; python3 ci/ci-util.py generate-matrix | tee "$GITHUB_OUTPUT"
+      - run: |
+          set -eo pipefail # Needed to actually fail the job if ci-util fails
+          python3 ci/ci-util.py generate-matrix | tee "$GITHUB_OUTPUT"
         id: script
 
   test:

From 13c5374b7cd374583f4d2df7cd014a2ede9e0570 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Wed, 30 Jul 2025 09:45:53 -0500
Subject: [PATCH 124/133] Simplify the configuration for no-panic

Currently, attributes for `no-panic` are gated behind both the `test`
config and `assert_no_panic`, because `no-panic` is a dev dependency (so
only available with test configuration). However, we only emit
`assert_no_panic` when the test config is also set anyway, so there
isn't any need to gate on both.

Replace gates on `all(test, assert_no_panic)` with only
`assert_no_panic`. This is simpler, and also has the benefit that
attempting to check for panics without `--test` errors.
---
 libm/src/math/acos.rs                  |  2 +-
 libm/src/math/acosf.rs                 |  2 +-
 libm/src/math/acosh.rs                 |  2 +-
 libm/src/math/acoshf.rs                |  2 +-
 libm/src/math/asin.rs                  |  2 +-
 libm/src/math/asinf.rs                 |  2 +-
 libm/src/math/asinh.rs                 |  2 +-
 libm/src/math/asinhf.rs                |  2 +-
 libm/src/math/atan.rs                  |  2 +-
 libm/src/math/atan2.rs                 |  2 +-
 libm/src/math/atan2f.rs                |  2 +-
 libm/src/math/atanf.rs                 |  2 +-
 libm/src/math/atanh.rs                 |  2 +-
 libm/src/math/atanhf.rs                |  2 +-
 libm/src/math/cbrt.rs                  |  2 +-
 libm/src/math/cbrtf.rs                 |  2 +-
 libm/src/math/ceil.rs                  |  8 ++++----
 libm/src/math/copysign.rs              |  8 ++++----
 libm/src/math/cos.rs                   |  2 +-
 libm/src/math/cosf.rs                  |  2 +-
 libm/src/math/cosh.rs                  |  2 +-
 libm/src/math/coshf.rs                 |  2 +-
 libm/src/math/erf.rs                   |  2 +-
 libm/src/math/erff.rs                  |  2 +-
 libm/src/math/exp.rs                   |  2 +-
 libm/src/math/exp10.rs                 |  2 +-
 libm/src/math/exp10f.rs                |  2 +-
 libm/src/math/exp2.rs                  |  2 +-
 libm/src/math/exp2f.rs                 |  2 +-
 libm/src/math/expf.rs                  |  2 +-
 libm/src/math/expm1.rs                 |  2 +-
 libm/src/math/expm1f.rs                |  2 +-
 libm/src/math/expo2.rs                 |  2 +-
 libm/src/math/fabs.rs                  |  8 ++++----
 libm/src/math/fdim.rs                  |  8 ++++----
 libm/src/math/floor.rs                 |  8 ++++----
 libm/src/math/fma.rs                   |  8 ++++----
 libm/src/math/fmin_fmax.rs             | 16 ++++++++--------
 libm/src/math/fminimum_fmaximum.rs     | 16 ++++++++--------
 libm/src/math/fminimum_fmaximum_num.rs | 16 ++++++++--------
 libm/src/math/fmod.rs                  |  8 ++++----
 libm/src/math/frexp.rs                 |  2 +-
 libm/src/math/frexpf.rs                |  2 +-
 libm/src/math/hypot.rs                 |  2 +-
 libm/src/math/hypotf.rs                |  2 +-
 libm/src/math/ilogb.rs                 |  2 +-
 libm/src/math/ilogbf.rs                |  2 +-
 libm/src/math/j0.rs                    |  4 ++--
 libm/src/math/j0f.rs                   |  4 ++--
 libm/src/math/j1.rs                    |  4 ++--
 libm/src/math/j1f.rs                   |  4 ++--
 libm/src/math/jn.rs                    |  4 ++--
 libm/src/math/jnf.rs                   |  4 ++--
 libm/src/math/k_cos.rs                 |  2 +-
 libm/src/math/k_cosf.rs                |  2 +-
 libm/src/math/k_expo2.rs               |  2 +-
 libm/src/math/k_expo2f.rs              |  2 +-
 libm/src/math/k_sin.rs                 |  2 +-
 libm/src/math/k_sinf.rs                |  2 +-
 libm/src/math/k_tan.rs                 |  2 +-
 libm/src/math/k_tanf.rs                |  2 +-
 libm/src/math/ldexp.rs                 |  8 ++++----
 libm/src/math/lgamma.rs                |  2 +-
 libm/src/math/lgamma_r.rs              |  2 +-
 libm/src/math/lgammaf.rs               |  2 +-
 libm/src/math/lgammaf_r.rs             |  2 +-
 libm/src/math/log.rs                   |  2 +-
 libm/src/math/log10.rs                 |  2 +-
 libm/src/math/log10f.rs                |  2 +-
 libm/src/math/log1p.rs                 |  2 +-
 libm/src/math/log1pf.rs                |  2 +-
 libm/src/math/log2.rs                  |  2 +-
 libm/src/math/log2f.rs                 |  2 +-
 libm/src/math/logf.rs                  |  2 +-
 libm/src/math/modf.rs                  |  2 +-
 libm/src/math/modff.rs                 |  2 +-
 libm/src/math/nextafter.rs             |  2 +-
 libm/src/math/nextafterf.rs            |  2 +-
 libm/src/math/pow.rs                   |  2 +-
 libm/src/math/powf.rs                  |  2 +-
 libm/src/math/rem_pio2.rs              |  2 +-
 libm/src/math/rem_pio2_large.rs        |  2 +-
 libm/src/math/rem_pio2f.rs             |  2 +-
 libm/src/math/remainder.rs             |  2 +-
 libm/src/math/remainderf.rs            |  2 +-
 libm/src/math/remquo.rs                |  2 +-
 libm/src/math/remquof.rs               |  2 +-
 libm/src/math/rint.rs                  |  8 ++++----
 libm/src/math/round.rs                 |  8 ++++----
 libm/src/math/roundeven.rs             |  8 ++++----
 libm/src/math/scalbn.rs                |  8 ++++----
 libm/src/math/sin.rs                   |  2 +-
 libm/src/math/sincos.rs                |  2 +-
 libm/src/math/sincosf.rs               |  2 +-
 libm/src/math/sinf.rs                  |  2 +-
 libm/src/math/sinh.rs                  |  2 +-
 libm/src/math/sinhf.rs                 |  2 +-
 libm/src/math/sqrt.rs                  |  8 ++++----
 libm/src/math/tan.rs                   |  2 +-
 libm/src/math/tanf.rs                  |  2 +-
 libm/src/math/tanh.rs                  |  2 +-
 libm/src/math/tanhf.rs                 |  2 +-
 libm/src/math/tgamma.rs                |  2 +-
 libm/src/math/tgammaf.rs               |  2 +-
 libm/src/math/trunc.rs                 |  8 ++++----
 105 files changed, 174 insertions(+), 174 deletions(-)

diff --git a/libm/src/math/acos.rs b/libm/src/math/acos.rs
index 23b13251e..89b2e7c5f 100644
--- a/libm/src/math/acos.rs
+++ b/libm/src/math/acos.rs
@@ -59,7 +59,7 @@ fn r(z: f64) -> f64 {
 /// Computes the inverse cosine (arc cosine) of the input value.
 /// Arguments must be in the range -1 to 1.
 /// Returns values in radians, in the range of 0 to pi.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn acos(x: f64) -> f64 {
     let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120
     let z: f64;
diff --git a/libm/src/math/acosf.rs b/libm/src/math/acosf.rs
index dd88eea5b..d263b3f2c 100644
--- a/libm/src/math/acosf.rs
+++ b/libm/src/math/acosf.rs
@@ -33,7 +33,7 @@ fn r(z: f32) -> f32 {
 /// Computes the inverse cosine (arc cosine) of the input value.
 /// Arguments must be in the range -1 to 1.
 /// Returns values in radians, in the range of 0 to pi.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn acosf(x: f32) -> f32 {
     let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120)
 
diff --git a/libm/src/math/acosh.rs b/libm/src/math/acosh.rs
index d1f5b9fa9..8737bad01 100644
--- a/libm/src/math/acosh.rs
+++ b/libm/src/math/acosh.rs
@@ -7,7 +7,7 @@ const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42,  0xfefa3
 /// Calculates the inverse hyperbolic cosine of `x`.
 /// Is defined as `log(x + sqrt(x*x-1))`.
 /// `x` must be a number greater than or equal to 1.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn acosh(x: f64) -> f64 {
     let u = x.to_bits();
     let e = ((u >> 52) as usize) & 0x7ff;
diff --git a/libm/src/math/acoshf.rs b/libm/src/math/acoshf.rs
index ad3455fdd..432fa03f1 100644
--- a/libm/src/math/acoshf.rs
+++ b/libm/src/math/acoshf.rs
@@ -7,7 +7,7 @@ const LN2: f32 = 0.693147180559945309417232121458176568;
 /// Calculates the inverse hyperbolic cosine of `x`.
 /// Is defined as `log(x + sqrt(x*x-1))`.
 /// `x` must be a number greater than or equal to 1.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn acoshf(x: f32) -> f32 {
     let u = x.to_bits();
     let a = u & 0x7fffffff;
diff --git a/libm/src/math/asin.rs b/libm/src/math/asin.rs
index 12d0cd35f..9554a3eac 100644
--- a/libm/src/math/asin.rs
+++ b/libm/src/math/asin.rs
@@ -66,7 +66,7 @@ fn comp_r(z: f64) -> f64 {
 /// Computes the inverse sine (arc sine) of the argument `x`.
 /// Arguments to asin must be in the range -1 to 1.
 /// Returns values in radians, in the range of -pi/2 to pi/2.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn asin(mut x: f64) -> f64 {
     let z: f64;
     let r: f64;
diff --git a/libm/src/math/asinf.rs b/libm/src/math/asinf.rs
index ed6855567..2dfe2a6d4 100644
--- a/libm/src/math/asinf.rs
+++ b/libm/src/math/asinf.rs
@@ -35,7 +35,7 @@ fn r(z: f32) -> f32 {
 /// Computes the inverse sine (arc sine) of the argument `x`.
 /// Arguments to asin must be in the range -1 to 1.
 /// Returns values in radians, in the range of -pi/2 to pi/2.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn asinf(mut x: f32) -> f32 {
     let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120)
 
diff --git a/libm/src/math/asinh.rs b/libm/src/math/asinh.rs
index 75d3c3ad4..d63bc0aa9 100644
--- a/libm/src/math/asinh.rs
+++ b/libm/src/math/asinh.rs
@@ -7,7 +7,7 @@ const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42,  0xfefa3
 ///
 /// Calculates the inverse hyperbolic sine of `x`.
 /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn asinh(mut x: f64) -> f64 {
     let mut u = x.to_bits();
     let e = ((u >> 52) as usize) & 0x7ff;
diff --git a/libm/src/math/asinhf.rs b/libm/src/math/asinhf.rs
index 27ed9dd37..3ca2d4489 100644
--- a/libm/src/math/asinhf.rs
+++ b/libm/src/math/asinhf.rs
@@ -7,7 +7,7 @@ const LN2: f32 = 0.693147180559945309417232121458176568;
 ///
 /// Calculates the inverse hyperbolic sine of `x`.
 /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn asinhf(mut x: f32) -> f32 {
     let u = x.to_bits();
     let i = u & 0x7fffffff;
diff --git a/libm/src/math/atan.rs b/libm/src/math/atan.rs
index 4ca5cc91a..0590ba87c 100644
--- a/libm/src/math/atan.rs
+++ b/libm/src/math/atan.rs
@@ -65,7 +65,7 @@ const AT: [f64; 11] = [
 ///
 /// Computes the inverse tangent (arc tangent) of the input value.
 /// Returns a value in radians, in the range of -pi/2 to pi/2.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atan(x: f64) -> f64 {
     let mut x = x;
     let mut ix = (x.to_bits() >> 32) as u32;
diff --git a/libm/src/math/atan2.rs b/libm/src/math/atan2.rs
index c668731cf..51456e409 100644
--- a/libm/src/math/atan2.rs
+++ b/libm/src/math/atan2.rs
@@ -47,7 +47,7 @@ const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
 /// Computes the inverse tangent (arc tangent) of `y/x`.
 /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0).
 /// Returns a value in radians, in the range of -pi to pi.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atan2(y: f64, x: f64) -> f64 {
     if x.is_nan() || y.is_nan() {
         return x + y;
diff --git a/libm/src/math/atan2f.rs b/libm/src/math/atan2f.rs
index 95b466fff..0f46c9f39 100644
--- a/libm/src/math/atan2f.rs
+++ b/libm/src/math/atan2f.rs
@@ -23,7 +23,7 @@ const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */
 /// Computes the inverse tangent (arc tangent) of `y/x`.
 /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0).
 /// Returns a value in radians, in the range of -pi to pi.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atan2f(y: f32, x: f32) -> f32 {
     if x.is_nan() || y.is_nan() {
         return x + y;
diff --git a/libm/src/math/atanf.rs b/libm/src/math/atanf.rs
index da8daa41a..58568d9a8 100644
--- a/libm/src/math/atanf.rs
+++ b/libm/src/math/atanf.rs
@@ -41,7 +41,7 @@ const A_T: [f32; 5] = [
 ///
 /// Computes the inverse tangent (arc tangent) of the input value.
 /// Returns a value in radians, in the range of -pi/2 to pi/2.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atanf(mut x: f32) -> f32 {
     let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120)
 
diff --git a/libm/src/math/atanh.rs b/libm/src/math/atanh.rs
index 9dc826f56..883ff150f 100644
--- a/libm/src/math/atanh.rs
+++ b/libm/src/math/atanh.rs
@@ -5,7 +5,7 @@ use super::log1p;
 ///
 /// Calculates the inverse hyperbolic tangent of `x`.
 /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atanh(x: f64) -> f64 {
     let u = x.to_bits();
     let e = ((u >> 52) as usize) & 0x7ff;
diff --git a/libm/src/math/atanhf.rs b/libm/src/math/atanhf.rs
index 80ccec1f6..e4e356d18 100644
--- a/libm/src/math/atanhf.rs
+++ b/libm/src/math/atanhf.rs
@@ -5,7 +5,7 @@ use super::log1pf;
 ///
 /// Calculates the inverse hyperbolic tangent of `x`.
 /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn atanhf(mut x: f32) -> f32 {
     let mut u = x.to_bits();
     let sign = (u >> 31) != 0;
diff --git a/libm/src/math/cbrt.rs b/libm/src/math/cbrt.rs
index cf56f7a97..e905e15f1 100644
--- a/libm/src/math/cbrt.rs
+++ b/libm/src/math/cbrt.rs
@@ -8,7 +8,7 @@ use super::Float;
 use super::support::{FpResult, Round, cold_path};
 
 /// Compute the cube root of the argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cbrt(x: f64) -> f64 {
     cbrt_round(x, Round::Nearest).val
 }
diff --git a/libm/src/math/cbrtf.rs b/libm/src/math/cbrtf.rs
index 9d70305c6..9d6958483 100644
--- a/libm/src/math/cbrtf.rs
+++ b/libm/src/math/cbrtf.rs
@@ -25,7 +25,7 @@ const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
 /// Cube root (f32)
 ///
 /// Computes the cube root of the argument.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cbrtf(x: f32) -> f32 {
     let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24
 
diff --git a/libm/src/math/ceil.rs b/libm/src/math/ceil.rs
index 4e1035457..2cac49f29 100644
--- a/libm/src/math/ceil.rs
+++ b/libm/src/math/ceil.rs
@@ -2,7 +2,7 @@
 ///
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceilf16(x: f16) -> f16 {
     super::generic::ceil(x)
 }
@@ -10,7 +10,7 @@ pub fn ceilf16(x: f16) -> f16 {
 /// Ceil (f32)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceilf(x: f32) -> f32 {
     select_implementation! {
         name: ceilf,
@@ -24,7 +24,7 @@ pub fn ceilf(x: f32) -> f32 {
 /// Ceil (f64)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceil(x: f64) -> f64 {
     select_implementation! {
         name: ceil,
@@ -40,7 +40,7 @@ pub fn ceil(x: f64) -> f64 {
 ///
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceilf128(x: f128) -> f128 {
     super::generic::ceil(x)
 }
diff --git a/libm/src/math/copysign.rs b/libm/src/math/copysign.rs
index d093d6107..591a87a94 100644
--- a/libm/src/math/copysign.rs
+++ b/libm/src/math/copysign.rs
@@ -3,7 +3,7 @@
 /// Constructs a number with the magnitude (absolute value) of its
 /// first argument, `x`, and the sign of its second argument, `y`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn copysignf16(x: f16, y: f16) -> f16 {
     super::generic::copysign(x, y)
 }
@@ -12,7 +12,7 @@ pub fn copysignf16(x: f16, y: f16) -> f16 {
 ///
 /// Constructs a number with the magnitude (absolute value) of its
 /// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn copysignf(x: f32, y: f32) -> f32 {
     super::generic::copysign(x, y)
 }
@@ -21,7 +21,7 @@ pub fn copysignf(x: f32, y: f32) -> f32 {
 ///
 /// Constructs a number with the magnitude (absolute value) of its
 /// first argument, `x`, and the sign of its second argument, `y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn copysign(x: f64, y: f64) -> f64 {
     super::generic::copysign(x, y)
 }
@@ -31,7 +31,7 @@ pub fn copysign(x: f64, y: f64) -> f64 {
 /// Constructs a number with the magnitude (absolute value) of its
 /// first argument, `x`, and the sign of its second argument, `y`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn copysignf128(x: f128, y: f128) -> f128 {
     super::generic::copysign(x, y)
 }
diff --git a/libm/src/math/cos.rs b/libm/src/math/cos.rs
index de99cd4c5..b2f786323 100644
--- a/libm/src/math/cos.rs
+++ b/libm/src/math/cos.rs
@@ -45,7 +45,7 @@ use super::{k_cos, k_sin, rem_pio2};
 /// The cosine of `x` (f64).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cos(x: f64) -> f64 {
     let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff;
 
diff --git a/libm/src/math/cosf.rs b/libm/src/math/cosf.rs
index 27c2fc3b9..bf5cb9196 100644
--- a/libm/src/math/cosf.rs
+++ b/libm/src/math/cosf.rs
@@ -27,7 +27,7 @@ const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */
 /// The cosine of `x` (f32).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cosf(x: f32) -> f32 {
     let x64 = x as f64;
 
diff --git a/libm/src/math/cosh.rs b/libm/src/math/cosh.rs
index d2e43fd6c..01081cfc7 100644
--- a/libm/src/math/cosh.rs
+++ b/libm/src/math/cosh.rs
@@ -5,7 +5,7 @@ use super::{exp, expm1, k_expo2};
 /// Computes the hyperbolic cosine of the argument x.
 /// Is defined as `(exp(x) + exp(-x))/2`
 /// Angles are specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn cosh(mut x: f64) -> f64 {
     /* |x| */
     let mut ix = x.to_bits();
diff --git a/libm/src/math/coshf.rs b/libm/src/math/coshf.rs
index 567a24410..dc039a311 100644
--- a/libm/src/math/coshf.rs
+++ b/libm/src/math/coshf.rs
@@ -5,7 +5,7 @@ use super::{expf, expm1f, k_expo2f};
 /// Computes the hyperbolic cosine of the argument x.
 /// Is defined as `(exp(x) + exp(-x))/2`
 /// Angles are specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn coshf(mut x: f32) -> f32 {
     let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
 
diff --git a/libm/src/math/erf.rs b/libm/src/math/erf.rs
index 5d82228a0..6c78440af 100644
--- a/libm/src/math/erf.rs
+++ b/libm/src/math/erf.rs
@@ -219,7 +219,7 @@ fn erfc2(ix: u32, mut x: f64) -> f64 {
 /// Calculates an approximation to the “error function”, which estimates
 /// the probability that an observation will fall within x standard
 /// deviations of the mean (assuming a normal distribution).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn erf(x: f64) -> f64 {
     let r: f64;
     let s: f64;
diff --git a/libm/src/math/erff.rs b/libm/src/math/erff.rs
index fe15f0108..2a7680275 100644
--- a/libm/src/math/erff.rs
+++ b/libm/src/math/erff.rs
@@ -130,7 +130,7 @@ fn erfc2(mut ix: u32, mut x: f32) -> f32 {
 /// Calculates an approximation to the “error function”, which estimates
 /// the probability that an observation will fall within x standard
 /// deviations of the mean (assuming a normal distribution).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn erff(x: f32) -> f32 {
     let r: f32;
     let s: f32;
diff --git a/libm/src/math/exp.rs b/libm/src/math/exp.rs
index 782042b62..78ce5dd13 100644
--- a/libm/src/math/exp.rs
+++ b/libm/src/math/exp.rs
@@ -81,7 +81,7 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
 ///
 /// Calculate the exponential of `x`, that is, *e* raised to the power `x`
 /// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp(mut x: f64) -> f64 {
     let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
     let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149
diff --git a/libm/src/math/exp10.rs b/libm/src/math/exp10.rs
index 7c33c92b6..1f49f5e96 100644
--- a/libm/src/math/exp10.rs
+++ b/libm/src/math/exp10.rs
@@ -7,7 +7,7 @@ const P10: &[f64] = &[
 ];
 
 /// Calculates 10 raised to the power of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp10(x: f64) -> f64 {
     let (mut y, n) = modf(x);
     let u: u64 = n.to_bits();
diff --git a/libm/src/math/exp10f.rs b/libm/src/math/exp10f.rs
index 303045b33..22a264211 100644
--- a/libm/src/math/exp10f.rs
+++ b/libm/src/math/exp10f.rs
@@ -7,7 +7,7 @@ const P10: &[f32] = &[
 ];
 
 /// Calculates 10 raised to the power of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp10f(x: f32) -> f32 {
     let (mut y, n) = modff(x);
     let u = n.to_bits();
diff --git a/libm/src/math/exp2.rs b/libm/src/math/exp2.rs
index 6e98d066c..6e4cbc29d 100644
--- a/libm/src/math/exp2.rs
+++ b/libm/src/math/exp2.rs
@@ -322,7 +322,7 @@ static TBL: [u64; TBLSIZE * 2] = [
 /// Exponential, base 2 (f64)
 ///
 /// Calculate `2^x`, that is, 2 raised to the power `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp2(mut x: f64) -> f64 {
     let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64;
     let p1 = f64::from_bits(0x3fe62e42fefa39ef);
diff --git a/libm/src/math/exp2f.rs b/libm/src/math/exp2f.rs
index f452b6a20..733d2f1a8 100644
--- a/libm/src/math/exp2f.rs
+++ b/libm/src/math/exp2f.rs
@@ -73,7 +73,7 @@ static EXP2FT: [u64; TBLSIZE] = [
 /// Exponential, base 2 (f32)
 ///
 /// Calculate `2^x`, that is, 2 raised to the power `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn exp2f(mut x: f32) -> f32 {
     let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
     let p1 = f32::from_bits(0x3f317218);
diff --git a/libm/src/math/expf.rs b/libm/src/math/expf.rs
index 8dc067ab0..dbbfdbba9 100644
--- a/libm/src/math/expf.rs
+++ b/libm/src/math/expf.rs
@@ -30,7 +30,7 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */
 ///
 /// Calculate the exponential of `x`, that is, *e* raised to the power `x`
 /// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn expf(mut x: f32) -> f32 {
     let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
     let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126  /*original 0x1p-149f    ??????????? */
diff --git a/libm/src/math/expm1.rs b/libm/src/math/expm1.rs
index f25153f32..3714bf3af 100644
--- a/libm/src/math/expm1.rs
+++ b/libm/src/math/expm1.rs
@@ -30,7 +30,7 @@ const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
 /// system of logarithms, approximately 2.71828).
 /// The result is accurate even for small values of `x`,
 /// where using `exp(x)-1` would lose many significant digits.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn expm1(mut x: f64) -> f64 {
     let hi: f64;
     let lo: f64;
diff --git a/libm/src/math/expm1f.rs b/libm/src/math/expm1f.rs
index 63dc86e37..f77515a4b 100644
--- a/libm/src/math/expm1f.rs
+++ b/libm/src/math/expm1f.rs
@@ -32,7 +32,7 @@ const Q2: f32 = 1.5807170421e-3; /*  0xcf3010.0p-33 */
 /// system of logarithms, approximately 2.71828).
 /// The result is accurate even for small values of `x`,
 /// where using `exp(x)-1` would lose many significant digits.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn expm1f(mut x: f32) -> f32 {
     let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
 
diff --git a/libm/src/math/expo2.rs b/libm/src/math/expo2.rs
index 82e9b360a..ce90858ec 100644
--- a/libm/src/math/expo2.rs
+++ b/libm/src/math/expo2.rs
@@ -1,7 +1,7 @@
 use super::{combine_words, exp};
 
 /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn expo2(x: f64) -> f64 {
     /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */
     const K: i32 = 2043;
diff --git a/libm/src/math/fabs.rs b/libm/src/math/fabs.rs
index 0050a309f..7344e21a1 100644
--- a/libm/src/math/fabs.rs
+++ b/libm/src/math/fabs.rs
@@ -3,7 +3,7 @@
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fabsf16(x: f16) -> f16 {
     super::generic::fabs(x)
 }
@@ -12,7 +12,7 @@ pub fn fabsf16(x: f16) -> f16 {
 ///
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fabsf(x: f32) -> f32 {
     select_implementation! {
         name: fabsf,
@@ -27,7 +27,7 @@ pub fn fabsf(x: f32) -> f32 {
 ///
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fabs(x: f64) -> f64 {
     select_implementation! {
         name: fabs,
@@ -43,7 +43,7 @@ pub fn fabs(x: f64) -> f64 {
 /// Calculates the absolute value (magnitude) of the argument `x`,
 /// by direct manipulation of the bit representation of `x`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fabsf128(x: f128) -> f128 {
     super::generic::fabs(x)
 }
diff --git a/libm/src/math/fdim.rs b/libm/src/math/fdim.rs
index 082c5478b..dac409e86 100644
--- a/libm/src/math/fdim.rs
+++ b/libm/src/math/fdim.rs
@@ -7,7 +7,7 @@
 ///
 /// A range error may occur.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fdimf16(x: f16, y: f16) -> f16 {
     super::generic::fdim(x, y)
 }
@@ -20,7 +20,7 @@ pub fn fdimf16(x: f16, y: f16) -> f16 {
 /// * NAN   if either argument is NAN.
 ///
 /// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fdimf(x: f32, y: f32) -> f32 {
     super::generic::fdim(x, y)
 }
@@ -33,7 +33,7 @@ pub fn fdimf(x: f32, y: f32) -> f32 {
 /// * NAN   if either argument is NAN.
 ///
 /// A range error may occur.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fdim(x: f64, y: f64) -> f64 {
     super::generic::fdim(x, y)
 }
@@ -47,7 +47,7 @@ pub fn fdim(x: f64, y: f64) -> f64 {
 ///
 /// A range error may occur.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fdimf128(x: f128, y: f128) -> f128 {
     super::generic::fdim(x, y)
 }
diff --git a/libm/src/math/floor.rs b/libm/src/math/floor.rs
index 3c5eab101..7241c427f 100644
--- a/libm/src/math/floor.rs
+++ b/libm/src/math/floor.rs
@@ -2,7 +2,7 @@
 ///
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floorf16(x: f16) -> f16 {
     return super::generic::floor(x);
 }
@@ -10,7 +10,7 @@ pub fn floorf16(x: f16) -> f16 {
 /// Floor (f64)
 ///
 /// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floor(x: f64) -> f64 {
     select_implementation! {
         name: floor,
@@ -25,7 +25,7 @@ pub fn floor(x: f64) -> f64 {
 /// Floor (f32)
 ///
 /// Finds the nearest integer less than or equal to `x`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floorf(x: f32) -> f32 {
     select_implementation! {
         name: floorf,
@@ -40,7 +40,7 @@ pub fn floorf(x: f32) -> f32 {
 ///
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floorf128(x: f128) -> f128 {
     return super::generic::floor(x);
 }
diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs
index 5bf473cfe..70e6de768 100644
--- a/libm/src/math/fma.rs
+++ b/libm/src/math/fma.rs
@@ -7,7 +7,7 @@ use crate::support::Round;
 // Placeholder so we can have `fmaf16` in the `Float` trait.
 #[allow(unused)]
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
     unimplemented!()
 }
@@ -15,7 +15,7 @@ pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
 /// Floating multiply add (f32)
 ///
 /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
     select_implementation! {
         name: fmaf,
@@ -32,7 +32,7 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
 /// Fused multiply add (f64)
 ///
 /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fma(x: f64, y: f64, z: f64) -> f64 {
     select_implementation! {
         name: fma,
@@ -50,7 +50,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 {
 ///
 /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
     generic::fma_round(x, y, z, Round::Nearest).val
 }
diff --git a/libm/src/math/fmin_fmax.rs b/libm/src/math/fmin_fmax.rs
index 481301994..c4c1b0435 100644
--- a/libm/src/math/fmin_fmax.rs
+++ b/libm/src/math/fmin_fmax.rs
@@ -3,7 +3,7 @@
 /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminf16(x: f16, y: f16) -> f16 {
     super::generic::fmin(x, y)
 }
@@ -12,7 +12,7 @@ pub fn fminf16(x: f16, y: f16) -> f16 {
 ///
 /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminf(x: f32, y: f32) -> f32 {
     super::generic::fmin(x, y)
 }
@@ -21,7 +21,7 @@ pub fn fminf(x: f32, y: f32) -> f32 {
 ///
 /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmin(x: f64, y: f64) -> f64 {
     super::generic::fmin(x, y)
 }
@@ -31,7 +31,7 @@ pub fn fmin(x: f64, y: f64) -> f64 {
 /// This coincides with IEEE 754-2011 `minNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminf128(x: f128, y: f128) -> f128 {
     super::generic::fmin(x, y)
 }
@@ -41,7 +41,7 @@ pub fn fminf128(x: f128, y: f128) -> f128 {
 /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaxf16(x: f16, y: f16) -> f16 {
     super::generic::fmax(x, y)
 }
@@ -50,7 +50,7 @@ pub fn fmaxf16(x: f16, y: f16) -> f16 {
 ///
 /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaxf(x: f32, y: f32) -> f32 {
     super::generic::fmax(x, y)
 }
@@ -59,7 +59,7 @@ pub fn fmaxf(x: f32, y: f32) -> f32 {
 ///
 /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmax(x: f64, y: f64) -> f64 {
     super::generic::fmax(x, y)
 }
@@ -69,7 +69,7 @@ pub fn fmax(x: f64, y: f64) -> f64 {
 /// This coincides with IEEE 754-2011 `maxNum`. The result disregards signed zero (meaning if
 /// the inputs are -0.0 and +0.0, either may be returned).
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaxf128(x: f128, y: f128) -> f128 {
     super::generic::fmax(x, y)
 }
diff --git a/libm/src/math/fminimum_fmaximum.rs b/libm/src/math/fminimum_fmaximum.rs
index 8f1308670..a3c9c9c39 100644
--- a/libm/src/math/fminimum_fmaximum.rs
+++ b/libm/src/math/fminimum_fmaximum.rs
@@ -2,7 +2,7 @@
 ///
 /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimumf16(x: f16, y: f16) -> f16 {
     super::generic::fminimum(x, y)
 }
@@ -10,7 +10,7 @@ pub fn fminimumf16(x: f16, y: f16) -> f16 {
 /// Return the lesser of two arguments or, if either argument is NaN, the other argument.
 ///
 /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum(x: f64, y: f64) -> f64 {
     super::generic::fminimum(x, y)
 }
@@ -18,7 +18,7 @@ pub fn fminimum(x: f64, y: f64) -> f64 {
 /// Return the lesser of two arguments or, if either argument is NaN, the other argument.
 ///
 /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimumf(x: f32, y: f32) -> f32 {
     super::generic::fminimum(x, y)
 }
@@ -27,7 +27,7 @@ pub fn fminimumf(x: f32, y: f32) -> f32 {
 ///
 /// This coincides with IEEE 754-2019 `minimum`. The result orders -0.0 < 0.0.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimumf128(x: f128, y: f128) -> f128 {
     super::generic::fminimum(x, y)
 }
@@ -36,7 +36,7 @@ pub fn fminimumf128(x: f128, y: f128) -> f128 {
 ///
 /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximumf16(x: f16, y: f16) -> f16 {
     super::generic::fmaximum(x, y)
 }
@@ -44,7 +44,7 @@ pub fn fmaximumf16(x: f16, y: f16) -> f16 {
 /// Return the greater of two arguments or, if either argument is NaN, the other argument.
 ///
 /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximumf(x: f32, y: f32) -> f32 {
     super::generic::fmaximum(x, y)
 }
@@ -52,7 +52,7 @@ pub fn fmaximumf(x: f32, y: f32) -> f32 {
 /// Return the greater of two arguments or, if either argument is NaN, the other argument.
 ///
 /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum(x: f64, y: f64) -> f64 {
     super::generic::fmaximum(x, y)
 }
@@ -61,7 +61,7 @@ pub fn fmaximum(x: f64, y: f64) -> f64 {
 ///
 /// This coincides with IEEE 754-2019 `maximum`. The result orders -0.0 < 0.0.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximumf128(x: f128, y: f128) -> f128 {
     super::generic::fmaximum(x, y)
 }
diff --git a/libm/src/math/fminimum_fmaximum_num.rs b/libm/src/math/fminimum_fmaximum_num.rs
index fadf93418..612cefe75 100644
--- a/libm/src/math/fminimum_fmaximum_num.rs
+++ b/libm/src/math/fminimum_fmaximum_num.rs
@@ -2,7 +2,7 @@
 ///
 /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum_numf16(x: f16, y: f16) -> f16 {
     super::generic::fminimum_num(x, y)
 }
@@ -10,7 +10,7 @@ pub fn fminimum_numf16(x: f16, y: f16) -> f16 {
 /// Return the lesser of two arguments or, if either argument is NaN, NaN.
 ///
 /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum_numf(x: f32, y: f32) -> f32 {
     super::generic::fminimum_num(x, y)
 }
@@ -18,7 +18,7 @@ pub fn fminimum_numf(x: f32, y: f32) -> f32 {
 /// Return the lesser of two arguments or, if either argument is NaN, NaN.
 ///
 /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum_num(x: f64, y: f64) -> f64 {
     super::generic::fminimum_num(x, y)
 }
@@ -27,7 +27,7 @@ pub fn fminimum_num(x: f64, y: f64) -> f64 {
 ///
 /// This coincides with IEEE 754-2019 `minimumNumber`. The result orders -0.0 < 0.0.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fminimum_numf128(x: f128, y: f128) -> f128 {
     super::generic::fminimum_num(x, y)
 }
@@ -36,7 +36,7 @@ pub fn fminimum_numf128(x: f128, y: f128) -> f128 {
 ///
 /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum_numf16(x: f16, y: f16) -> f16 {
     super::generic::fmaximum_num(x, y)
 }
@@ -44,7 +44,7 @@ pub fn fmaximum_numf16(x: f16, y: f16) -> f16 {
 /// Return the greater of two arguments or, if either argument is NaN, NaN.
 ///
 /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum_numf(x: f32, y: f32) -> f32 {
     super::generic::fmaximum_num(x, y)
 }
@@ -52,7 +52,7 @@ pub fn fmaximum_numf(x: f32, y: f32) -> f32 {
 /// Return the greater of two arguments or, if either argument is NaN, NaN.
 ///
 /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum_num(x: f64, y: f64) -> f64 {
     super::generic::fmaximum_num(x, y)
 }
@@ -61,7 +61,7 @@ pub fn fmaximum_num(x: f64, y: f64) -> f64 {
 ///
 /// This coincides with IEEE 754-2019 `maximumNumber`. The result orders -0.0 < 0.0.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmaximum_numf128(x: f128, y: f128) -> f128 {
     super::generic::fmaximum_num(x, y)
 }
diff --git a/libm/src/math/fmod.rs b/libm/src/math/fmod.rs
index c4752b925..6ae1be560 100644
--- a/libm/src/math/fmod.rs
+++ b/libm/src/math/fmod.rs
@@ -1,25 +1,25 @@
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmodf16(x: f16, y: f16) -> f16 {
     super::generic::fmod(x, y)
 }
 
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmodf(x: f32, y: f32) -> f32 {
     super::generic::fmod(x, y)
 }
 
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmod(x: f64, y: f64) -> f64 {
     super::generic::fmod(x, y)
 }
 
 /// Calculate the remainder of `x / y`, the precise result of `x - trunc(x / y) * y`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn fmodf128(x: f128, y: f128) -> f128 {
     super::generic::fmod(x, y)
 }
diff --git a/libm/src/math/frexp.rs b/libm/src/math/frexp.rs
index de7a64fda..932111eeb 100644
--- a/libm/src/math/frexp.rs
+++ b/libm/src/math/frexp.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn frexp(x: f64) -> (f64, i32) {
     let mut y = x.to_bits();
     let ee = ((y >> 52) & 0x7ff) as i32;
diff --git a/libm/src/math/frexpf.rs b/libm/src/math/frexpf.rs
index 0ec91c2d3..904bf14f7 100644
--- a/libm/src/math/frexpf.rs
+++ b/libm/src/math/frexpf.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn frexpf(x: f32) -> (f32, i32) {
     let mut y = x.to_bits();
     let ee: i32 = ((y >> 23) & 0xff) as i32;
diff --git a/libm/src/math/hypot.rs b/libm/src/math/hypot.rs
index da458ea1d..b92ee18ca 100644
--- a/libm/src/math/hypot.rs
+++ b/libm/src/math/hypot.rs
@@ -17,7 +17,7 @@ fn sq(x: f64) -> (f64, f64) {
     (hi, lo)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn hypot(mut x: f64, mut y: f64) -> f64 {
     let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700
     let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700
diff --git a/libm/src/math/hypotf.rs b/libm/src/math/hypotf.rs
index 576eebb33..e7635ffc9 100644
--- a/libm/src/math/hypotf.rs
+++ b/libm/src/math/hypotf.rs
@@ -2,7 +2,7 @@ use core::f32;
 
 use super::sqrtf;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn hypotf(mut x: f32, mut y: f32) -> f32 {
     let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90
     let x1p_90 = f32::from_bits(0x12800000); // 0x1p-90f === 2 ^ -90
diff --git a/libm/src/math/ilogb.rs b/libm/src/math/ilogb.rs
index 5b41f7b1d..ef774f6ad 100644
--- a/libm/src/math/ilogb.rs
+++ b/libm/src/math/ilogb.rs
@@ -1,7 +1,7 @@
 const FP_ILOGBNAN: i32 = -1 - 0x7fffffff;
 const FP_ILOGB0: i32 = FP_ILOGBNAN;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ilogb(x: f64) -> i32 {
     let mut i: u64 = x.to_bits();
     let e = ((i >> 52) & 0x7ff) as i32;
diff --git a/libm/src/math/ilogbf.rs b/libm/src/math/ilogbf.rs
index 3585d6d36..5b0cb46ec 100644
--- a/libm/src/math/ilogbf.rs
+++ b/libm/src/math/ilogbf.rs
@@ -1,7 +1,7 @@
 const FP_ILOGBNAN: i32 = -1 - 0x7fffffff;
 const FP_ILOGB0: i32 = FP_ILOGBNAN;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ilogbf(x: f32) -> i32 {
     let mut i = x.to_bits();
     let e = ((i >> 23) & 0xff) as i32;
diff --git a/libm/src/math/j0.rs b/libm/src/math/j0.rs
index 99d656f0d..7b0800477 100644
--- a/libm/src/math/j0.rs
+++ b/libm/src/math/j0.rs
@@ -110,7 +110,7 @@ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */
 const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn j0(mut x: f64) -> f64 {
     let z: f64;
     let r: f64;
@@ -165,7 +165,7 @@ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */
 const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn y0(x: f64) -> f64 {
     let z: f64;
     let u: f64;
diff --git a/libm/src/math/j0f.rs b/libm/src/math/j0f.rs
index 25e5b325c..1c6a7c344 100644
--- a/libm/src/math/j0f.rs
+++ b/libm/src/math/j0f.rs
@@ -63,7 +63,7 @@ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */
 const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn j0f(mut x: f32) -> f32 {
     let z: f32;
     let r: f32;
@@ -110,7 +110,7 @@ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */
 const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */
 
 /// Zeroth order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn y0f(x: f32) -> f32 {
     let z: f32;
     let u: f32;
diff --git a/libm/src/math/j1.rs b/libm/src/math/j1.rs
index 9b604d9e4..7d304ba10 100644
--- a/libm/src/math/j1.rs
+++ b/libm/src/math/j1.rs
@@ -114,7 +114,7 @@ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */
 const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn j1(x: f64) -> f64 {
     let mut z: f64;
     let r: f64;
@@ -161,7 +161,7 @@ const V0: [f64; 5] = [
 ];
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn y1(x: f64) -> f64 {
     let z: f64;
     let u: f64;
diff --git a/libm/src/math/j1f.rs b/libm/src/math/j1f.rs
index da5413ac2..cd829c1aa 100644
--- a/libm/src/math/j1f.rs
+++ b/libm/src/math/j1f.rs
@@ -64,7 +64,7 @@ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */
 const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn j1f(x: f32) -> f32 {
     let mut z: f32;
     let r: f32;
@@ -110,7 +110,7 @@ const V0: [f32; 5] = [
 ];
 
 /// First order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn y1f(x: f32) -> f32 {
     let z: f32;
     let u: f32;
diff --git a/libm/src/math/jn.rs b/libm/src/math/jn.rs
index 31f8d9c53..b87aeaf1c 100644
--- a/libm/src/math/jn.rs
+++ b/libm/src/math/jn.rs
@@ -39,7 +39,7 @@ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0,
 const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn jn(n: i32, mut x: f64) -> f64 {
     let mut ix: u32;
     let lx: u32;
@@ -249,7 +249,7 @@ pub fn jn(n: i32, mut x: f64) -> f64 {
 }
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn yn(n: i32, x: f64) -> f64 {
     let mut ix: u32;
     let lx: u32;
diff --git a/libm/src/math/jnf.rs b/libm/src/math/jnf.rs
index 52cf7d8a8..34fdc5112 100644
--- a/libm/src/math/jnf.rs
+++ b/libm/src/math/jnf.rs
@@ -16,7 +16,7 @@
 use super::{fabsf, j0f, j1f, logf, y0f, y1f};
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the first kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn jnf(n: i32, mut x: f32) -> f32 {
     let mut ix: u32;
     let mut nm1: i32;
@@ -192,7 +192,7 @@ pub fn jnf(n: i32, mut x: f32) -> f32 {
 }
 
 /// Integer order of the [Bessel function](https://en.wikipedia.org/wiki/Bessel_function) of the second kind (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ynf(n: i32, x: f32) -> f32 {
     let mut ix: u32;
     let mut ib: u32;
diff --git a/libm/src/math/k_cos.rs b/libm/src/math/k_cos.rs
index 49b2fc64d..1a2ebabe3 100644
--- a/libm/src/math/k_cos.rs
+++ b/libm/src/math/k_cos.rs
@@ -51,7 +51,7 @@ const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
 //         expression for cos().  Retention happens in all cases tested
 //         under FreeBSD, so don't pessimize things by forcibly clipping
 //         any extra precision in w.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_cos(x: f64, y: f64) -> f64 {
     let z = x * x;
     let w = z * z;
diff --git a/libm/src/math/k_cosf.rs b/libm/src/math/k_cosf.rs
index e99f2348c..68f568c24 100644
--- a/libm/src/math/k_cosf.rs
+++ b/libm/src/math/k_cosf.rs
@@ -20,7 +20,7 @@ const C1: f64 = 0.0416666233237390631894; /*  0x155553e1053a42.0p-57 */
 const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */
 const C3: f64 = 0.0000243904487962774090654; /*  0x199342e0ee5069.0p-68 */
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_cosf(x: f64) -> f32 {
     let z = x * x;
     let w = z * z;
diff --git a/libm/src/math/k_expo2.rs b/libm/src/math/k_expo2.rs
index 7345075f3..7b63952d2 100644
--- a/libm/src/math/k_expo2.rs
+++ b/libm/src/math/k_expo2.rs
@@ -4,7 +4,7 @@ use super::exp;
 const K: i32 = 2043;
 
 /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_expo2(x: f64) -> f64 {
     let k_ln2 = f64::from_bits(0x40962066151add8b);
     /* note that k is odd and scale*scale overflows */
diff --git a/libm/src/math/k_expo2f.rs b/libm/src/math/k_expo2f.rs
index fbd7b27d5..02213cec4 100644
--- a/libm/src/math/k_expo2f.rs
+++ b/libm/src/math/k_expo2f.rs
@@ -4,7 +4,7 @@ use super::expf;
 const K: i32 = 235;
 
 /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_expo2f(x: f32) -> f32 {
     let k_ln2 = f32::from_bits(0x4322e3bc);
     /* note that k is odd and scale*scale overflows */
diff --git a/libm/src/math/k_sin.rs b/libm/src/math/k_sin.rs
index 9dd96c944..2f8542945 100644
--- a/libm/src/math/k_sin.rs
+++ b/libm/src/math/k_sin.rs
@@ -43,7 +43,7 @@ const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
 //              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
 //         then                   3    2
 //              sin(x) = x + (S1*x + (x *(r-y/2)+y))
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 {
     let z = x * x;
     let w = z * z;
diff --git a/libm/src/math/k_sinf.rs b/libm/src/math/k_sinf.rs
index 88d10caba..297d88bbb 100644
--- a/libm/src/math/k_sinf.rs
+++ b/libm/src/math/k_sinf.rs
@@ -20,7 +20,7 @@ const S2: f64 = 0.0083333293858894631756; /*  0x111110896efbb2.0p-59 */
 const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */
 const S4: f64 = 0.0000027183114939898219064; /*  0x16cd878c3b46a7.0p-71 */
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_sinf(x: f64) -> f32 {
     let z = x * x;
     let w = z * z;
diff --git a/libm/src/math/k_tan.rs b/libm/src/math/k_tan.rs
index d177010bb..ac48d661f 100644
--- a/libm/src/math/k_tan.rs
+++ b/libm/src/math/k_tan.rs
@@ -58,7 +58,7 @@ static T: [f64; 13] = [
 const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */
 const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 {
     let hx = (f64::to_bits(x) >> 32) as u32;
     let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
diff --git a/libm/src/math/k_tanf.rs b/libm/src/math/k_tanf.rs
index af8db539d..79382f57b 100644
--- a/libm/src/math/k_tanf.rs
+++ b/libm/src/math/k_tanf.rs
@@ -19,7 +19,7 @@ const T: [f64; 6] = [
     0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */
 ];
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 {
     let z = x * x;
     /*
diff --git a/libm/src/math/ldexp.rs b/libm/src/math/ldexp.rs
index 24899ba30..b32b8d524 100644
--- a/libm/src/math/ldexp.rs
+++ b/libm/src/math/ldexp.rs
@@ -1,21 +1,21 @@
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ldexpf16(x: f16, n: i32) -> f16 {
     super::scalbnf16(x, n)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ldexpf(x: f32, n: i32) -> f32 {
     super::scalbnf(x, n)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ldexp(x: f64, n: i32) -> f64 {
     super::scalbn(x, n)
 }
 
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ldexpf128(x: f128, n: i32) -> f128 {
     super::scalbnf128(x, n)
 }
diff --git a/libm/src/math/lgamma.rs b/libm/src/math/lgamma.rs
index 8312dc186..da7ce5c98 100644
--- a/libm/src/math/lgamma.rs
+++ b/libm/src/math/lgamma.rs
@@ -2,7 +2,7 @@ use super::lgamma_r;
 
 /// The natural logarithm of the
 /// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn lgamma(x: f64) -> f64 {
     lgamma_r(x).0
 }
diff --git a/libm/src/math/lgamma_r.rs b/libm/src/math/lgamma_r.rs
index 6becaad2c..38eb270f6 100644
--- a/libm/src/math/lgamma_r.rs
+++ b/libm/src/math/lgamma_r.rs
@@ -165,7 +165,7 @@ fn sin_pi(mut x: f64) -> f64 {
     }
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn lgamma_r(mut x: f64) -> (f64, i32) {
     let u: u64 = x.to_bits();
     let mut t: f64;
diff --git a/libm/src/math/lgammaf.rs b/libm/src/math/lgammaf.rs
index d37512397..920acfed2 100644
--- a/libm/src/math/lgammaf.rs
+++ b/libm/src/math/lgammaf.rs
@@ -2,7 +2,7 @@ use super::lgammaf_r;
 
 /// The natural logarithm of the
 /// [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn lgammaf(x: f32) -> f32 {
     lgammaf_r(x).0
 }
diff --git a/libm/src/math/lgammaf_r.rs b/libm/src/math/lgammaf_r.rs
index 10cecee54..a0b6a678a 100644
--- a/libm/src/math/lgammaf_r.rs
+++ b/libm/src/math/lgammaf_r.rs
@@ -100,7 +100,7 @@ fn sin_pi(mut x: f32) -> f32 {
     }
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn lgammaf_r(mut x: f32) -> (f32, i32) {
     let u = x.to_bits();
     let mut t: f32;
diff --git a/libm/src/math/log.rs b/libm/src/math/log.rs
index f2dc47ec5..9499c56d8 100644
--- a/libm/src/math/log.rs
+++ b/libm/src/math/log.rs
@@ -71,7 +71,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
 const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
 
 /// The natural logarithm of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log(mut x: f64) -> f64 {
     let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
 
diff --git a/libm/src/math/log10.rs b/libm/src/math/log10.rs
index 8c9d68c49..29f25d944 100644
--- a/libm/src/math/log10.rs
+++ b/libm/src/math/log10.rs
@@ -32,7 +32,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
 const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
 
 /// The base 10 logarithm of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log10(mut x: f64) -> f64 {
     let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
 
diff --git a/libm/src/math/log10f.rs b/libm/src/math/log10f.rs
index 18bf8fcc8..f89584bf9 100644
--- a/libm/src/math/log10f.rs
+++ b/libm/src/math/log10f.rs
@@ -26,7 +26,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */
 const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */
 
 /// The base 10 logarithm of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log10f(mut x: f32) -> f32 {
     let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25
 
diff --git a/libm/src/math/log1p.rs b/libm/src/math/log1p.rs
index 65142c0d6..c991cce60 100644
--- a/libm/src/math/log1p.rs
+++ b/libm/src/math/log1p.rs
@@ -66,7 +66,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
 const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
 
 /// The natural logarithm of 1+`x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log1p(x: f64) -> f64 {
     let mut ui: u64 = x.to_bits();
     let hfsq: f64;
diff --git a/libm/src/math/log1pf.rs b/libm/src/math/log1pf.rs
index 23978e61c..89a92fac9 100644
--- a/libm/src/math/log1pf.rs
+++ b/libm/src/math/log1pf.rs
@@ -21,7 +21,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */
 const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */
 
 /// The natural logarithm of 1+`x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log1pf(x: f32) -> f32 {
     let mut ui: u32 = x.to_bits();
     let hfsq: f32;
diff --git a/libm/src/math/log2.rs b/libm/src/math/log2.rs
index 701f63c25..9b750c9a2 100644
--- a/libm/src/math/log2.rs
+++ b/libm/src/math/log2.rs
@@ -30,7 +30,7 @@ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
 const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
 
 /// The base 2 logarithm of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log2(mut x: f64) -> f64 {
     let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
 
diff --git a/libm/src/math/log2f.rs b/libm/src/math/log2f.rs
index 5ba2427d1..0e5177d7a 100644
--- a/libm/src/math/log2f.rs
+++ b/libm/src/math/log2f.rs
@@ -24,7 +24,7 @@ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */
 const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */
 
 /// The base 2 logarithm of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn log2f(mut x: f32) -> f32 {
     let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25
 
diff --git a/libm/src/math/logf.rs b/libm/src/math/logf.rs
index 68d194302..cd7a7b0ba 100644
--- a/libm/src/math/logf.rs
+++ b/libm/src/math/logf.rs
@@ -22,7 +22,7 @@ const LG3: f32 = 0.28498786688; /*  0x91e9ee.0p-25 */
 const LG4: f32 = 0.24279078841; /*  0xf89e26.0p-26 */
 
 /// The natural logarithm of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn logf(mut x: f32) -> f32 {
     let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25
 
diff --git a/libm/src/math/modf.rs b/libm/src/math/modf.rs
index 6541862cd..a92a83dc5 100644
--- a/libm/src/math/modf.rs
+++ b/libm/src/math/modf.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn modf(x: f64) -> (f64, f64) {
     let rv2: f64;
     let mut u = x.to_bits();
diff --git a/libm/src/math/modff.rs b/libm/src/math/modff.rs
index 90c6bca7d..691f351ca 100644
--- a/libm/src/math/modff.rs
+++ b/libm/src/math/modff.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn modff(x: f32) -> (f32, f32) {
     let rv2: f32;
     let mut u: u32 = x.to_bits();
diff --git a/libm/src/math/nextafter.rs b/libm/src/math/nextafter.rs
index c991ff6f2..f4408468c 100644
--- a/libm/src/math/nextafter.rs
+++ b/libm/src/math/nextafter.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn nextafter(x: f64, y: f64) -> f64 {
     if x.is_nan() || y.is_nan() {
         return x + y;
diff --git a/libm/src/math/nextafterf.rs b/libm/src/math/nextafterf.rs
index 8ba383356..c15eb9de2 100644
--- a/libm/src/math/nextafterf.rs
+++ b/libm/src/math/nextafterf.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn nextafterf(x: f32, y: f32) -> f32 {
     if x.is_nan() || y.is_nan() {
         return x + y;
diff --git a/libm/src/math/pow.rs b/libm/src/math/pow.rs
index 94ae31cf0..914d68cfc 100644
--- a/libm/src/math/pow.rs
+++ b/libm/src/math/pow.rs
@@ -90,7 +90,7 @@ const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/l
 const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/
 
 /// Returns `x` to the power of `y` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn pow(x: f64, y: f64) -> f64 {
     let t1: f64;
     let t2: f64;
diff --git a/libm/src/math/powf.rs b/libm/src/math/powf.rs
index 11c7a7cbd..17772ae87 100644
--- a/libm/src/math/powf.rs
+++ b/libm/src/math/powf.rs
@@ -46,7 +46,7 @@ const IVLN2_H: f32 = 1.4426879883e+00;
 const IVLN2_L: f32 = 7.0526075433e-06;
 
 /// Returns `x` to the power of `y` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn powf(x: f32, y: f32) -> f32 {
     let mut z: f32;
     let mut ax: f32;
diff --git a/libm/src/math/rem_pio2.rs b/libm/src/math/rem_pio2.rs
index 648dca170..61b103027 100644
--- a/libm/src/math/rem_pio2.rs
+++ b/libm/src/math/rem_pio2.rs
@@ -41,7 +41,7 @@ const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 // use rem_pio2_large() for large x
 //
 // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
     let x1p24 = f64::from_bits(0x4170000000000000);
 
diff --git a/libm/src/math/rem_pio2_large.rs b/libm/src/math/rem_pio2_large.rs
index 792c09fb1..f1fdf3673 100644
--- a/libm/src/math/rem_pio2_large.rs
+++ b/libm/src/math/rem_pio2_large.rs
@@ -221,7 +221,7 @@ const PIO2: [f64; 8] = [
 /// skip the part of the product that are known to be a huge integer (
 /// more accurately, = 0 mod 8 ). Thus the number of operations are
 /// independent of the exponent of the input.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 {
     // FIXME(rust-lang/rust#144518): Inline assembly would cause `no_panic` to fail
     // on the callers of this function. As a workaround, avoid inlining `floor` here
diff --git a/libm/src/math/rem_pio2f.rs b/libm/src/math/rem_pio2f.rs
index 3c658fe3d..0472a1035 100644
--- a/libm/src/math/rem_pio2f.rs
+++ b/libm/src/math/rem_pio2f.rs
@@ -31,7 +31,7 @@ const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
 ///
 /// use double precision for everything except passing x
 /// use __rem_pio2_large() for large x
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) {
     let x64 = x as f64;
 
diff --git a/libm/src/math/remainder.rs b/libm/src/math/remainder.rs
index 9e966c9ed..54152df32 100644
--- a/libm/src/math/remainder.rs
+++ b/libm/src/math/remainder.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn remainder(x: f64, y: f64) -> f64 {
     let (result, _) = super::remquo(x, y);
     result
diff --git a/libm/src/math/remainderf.rs b/libm/src/math/remainderf.rs
index b1407cf2a..21f629214 100644
--- a/libm/src/math/remainderf.rs
+++ b/libm/src/math/remainderf.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn remainderf(x: f32, y: f32) -> f32 {
     let (result, _) = super::remquof(x, y);
     result
diff --git a/libm/src/math/remquo.rs b/libm/src/math/remquo.rs
index 4c11e8487..f13b09237 100644
--- a/libm/src/math/remquo.rs
+++ b/libm/src/math/remquo.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) {
     let ux: u64 = x.to_bits();
     let mut uy: u64 = y.to_bits();
diff --git a/libm/src/math/remquof.rs b/libm/src/math/remquof.rs
index b0e85ca66..cc7863a09 100644
--- a/libm/src/math/remquof.rs
+++ b/libm/src/math/remquof.rs
@@ -1,4 +1,4 @@
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) {
     let ux: u32 = x.to_bits();
     let mut uy: u32 = y.to_bits();
diff --git a/libm/src/math/rint.rs b/libm/src/math/rint.rs
index e1c32c943..011a7ae3d 100644
--- a/libm/src/math/rint.rs
+++ b/libm/src/math/rint.rs
@@ -2,7 +2,7 @@ use super::support::Round;
 
 /// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rintf16(x: f16) -> f16 {
     select_implementation! {
         name: rintf16,
@@ -14,7 +14,7 @@ pub fn rintf16(x: f16) -> f16 {
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rintf(x: f32) -> f32 {
     select_implementation! {
         name: rintf,
@@ -29,7 +29,7 @@ pub fn rintf(x: f32) -> f32 {
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rint(x: f64) -> f64 {
     select_implementation! {
         name: rint,
@@ -45,7 +45,7 @@ pub fn rint(x: f64) -> f64 {
 
 /// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rintf128(x: f128) -> f128 {
     super::generic::rint_round(x, Round::Nearest).val
 }
diff --git a/libm/src/math/round.rs b/libm/src/math/round.rs
index 6cd091cd7..256197e6c 100644
--- a/libm/src/math/round.rs
+++ b/libm/src/math/round.rs
@@ -1,25 +1,25 @@
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf16(x: f16) -> f16 {
     super::generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf(x: f32) -> f32 {
     super::generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn round(x: f64) -> f64 {
     super::generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf128(x: f128) -> f128 {
     super::generic::round(x)
 }
diff --git a/libm/src/math/roundeven.rs b/libm/src/math/roundeven.rs
index 6e621d762..f0d67d410 100644
--- a/libm/src/math/roundeven.rs
+++ b/libm/src/math/roundeven.rs
@@ -3,21 +3,21 @@ use super::support::{Float, Round};
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf16(x: f16) -> f16 {
     roundeven_impl(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf(x: f32) -> f32 {
     roundeven_impl(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundeven(x: f64) -> f64 {
     roundeven_impl(x)
 }
@@ -25,7 +25,7 @@ pub fn roundeven(x: f64) -> f64 {
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf128(x: f128) -> f128 {
     roundeven_impl(x)
 }
diff --git a/libm/src/math/scalbn.rs b/libm/src/math/scalbn.rs
index ed73c3f94..f1a67cb7f 100644
--- a/libm/src/math/scalbn.rs
+++ b/libm/src/math/scalbn.rs
@@ -1,21 +1,21 @@
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn scalbnf16(x: f16, n: i32) -> f16 {
     super::generic::scalbn(x, n)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn scalbnf(x: f32, n: i32) -> f32 {
     super::generic::scalbn(x, n)
 }
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn scalbn(x: f64, n: i32) -> f64 {
     super::generic::scalbn(x, n)
 }
 
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn scalbnf128(x: f128, n: i32) -> f128 {
     super::generic::scalbn(x, n)
 }
diff --git a/libm/src/math/sin.rs b/libm/src/math/sin.rs
index 229fa4bef..5378a7bc3 100644
--- a/libm/src/math/sin.rs
+++ b/libm/src/math/sin.rs
@@ -44,7 +44,7 @@ use super::{k_cos, k_sin, rem_pio2};
 /// The sine of `x` (f64).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sin(x: f64) -> f64 {
     let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
 
diff --git a/libm/src/math/sincos.rs b/libm/src/math/sincos.rs
index ebf482f2d..a364f7375 100644
--- a/libm/src/math/sincos.rs
+++ b/libm/src/math/sincos.rs
@@ -15,7 +15,7 @@ use super::{get_high_word, k_cos, k_sin, rem_pio2};
 /// Both the sine and cosine of `x` (f64).
 ///
 /// `x` is specified in radians and the return value is (sin(x), cos(x)).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sincos(x: f64) -> (f64, f64) {
     let s: f64;
     let c: f64;
diff --git a/libm/src/math/sincosf.rs b/libm/src/math/sincosf.rs
index f33607676..c4beb5267 100644
--- a/libm/src/math/sincosf.rs
+++ b/libm/src/math/sincosf.rs
@@ -26,7 +26,7 @@ const S4PIO2: f64 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */
 /// Both the sine and cosine of `x` (f32).
 ///
 /// `x` is specified in radians and the return value is (sin(x), cos(x)).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sincosf(x: f32) -> (f32, f32) {
     let s: f32;
     let c: f32;
diff --git a/libm/src/math/sinf.rs b/libm/src/math/sinf.rs
index 709b63fcf..b4edf6769 100644
--- a/libm/src/math/sinf.rs
+++ b/libm/src/math/sinf.rs
@@ -27,7 +27,7 @@ const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */
 /// The sine of `x` (f32).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sinf(x: f32) -> f32 {
     let x64 = x as f64;
 
diff --git a/libm/src/math/sinh.rs b/libm/src/math/sinh.rs
index 791841982..900dd6ca4 100644
--- a/libm/src/math/sinh.rs
+++ b/libm/src/math/sinh.rs
@@ -6,7 +6,7 @@ use super::{expm1, expo2};
 //
 
 /// The hyperbolic sine of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sinh(x: f64) -> f64 {
     // union {double f; uint64_t i;} u = {.f = x};
     // uint32_t w;
diff --git a/libm/src/math/sinhf.rs b/libm/src/math/sinhf.rs
index 44d2e3560..501acea30 100644
--- a/libm/src/math/sinhf.rs
+++ b/libm/src/math/sinhf.rs
@@ -1,7 +1,7 @@
 use super::{expm1f, k_expo2f};
 
 /// The hyperbolic sine of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sinhf(x: f32) -> f32 {
     let mut h = 0.5f32;
     let mut ix = x.to_bits();
diff --git a/libm/src/math/sqrt.rs b/libm/src/math/sqrt.rs
index 76bc240cf..7ba1bc9b3 100644
--- a/libm/src/math/sqrt.rs
+++ b/libm/src/math/sqrt.rs
@@ -1,6 +1,6 @@
 /// The square root of `x` (f16).
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrtf16(x: f16) -> f16 {
     select_implementation! {
         name: sqrtf16,
@@ -12,7 +12,7 @@ pub fn sqrtf16(x: f16) -> f16 {
 }
 
 /// The square root of `x` (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrtf(x: f32) -> f32 {
     select_implementation! {
         name: sqrtf,
@@ -28,7 +28,7 @@ pub fn sqrtf(x: f32) -> f32 {
 }
 
 /// The square root of `x` (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrt(x: f64) -> f64 {
     select_implementation! {
         name: sqrt,
@@ -45,7 +45,7 @@ pub fn sqrt(x: f64) -> f64 {
 
 /// The square root of `x` (f128).
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrtf128(x: f128) -> f128 {
     return super::generic::sqrt(x);
 }
diff --git a/libm/src/math/tan.rs b/libm/src/math/tan.rs
index a072bdec5..79c1bad56 100644
--- a/libm/src/math/tan.rs
+++ b/libm/src/math/tan.rs
@@ -43,7 +43,7 @@ use super::{k_tan, rem_pio2};
 /// The tangent of `x` (f64).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tan(x: f64) -> f64 {
     let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
 
diff --git a/libm/src/math/tanf.rs b/libm/src/math/tanf.rs
index 8bcf9581f..a615573d8 100644
--- a/libm/src/math/tanf.rs
+++ b/libm/src/math/tanf.rs
@@ -27,7 +27,7 @@ const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */
 /// The tangent of `x` (f32).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tanf(x: f32) -> f32 {
     let x64 = x as f64;
 
diff --git a/libm/src/math/tanh.rs b/libm/src/math/tanh.rs
index cc0abe4fc..c99cc2a70 100644
--- a/libm/src/math/tanh.rs
+++ b/libm/src/math/tanh.rs
@@ -8,7 +8,7 @@ use super::expm1;
 /// The hyperbolic tangent of `x` (f64).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tanh(mut x: f64) -> f64 {
     let mut uf: f64 = x;
     let mut ui: u64 = f64::to_bits(uf);
diff --git a/libm/src/math/tanhf.rs b/libm/src/math/tanhf.rs
index fffbba6c6..3cbd5917f 100644
--- a/libm/src/math/tanhf.rs
+++ b/libm/src/math/tanhf.rs
@@ -3,7 +3,7 @@ use super::expm1f;
 /// The hyperbolic tangent of `x` (f32).
 ///
 /// `x` is specified in radians.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tanhf(mut x: f32) -> f32 {
     /* x = |x| */
     let mut ix = x.to_bits();
diff --git a/libm/src/math/tgamma.rs b/libm/src/math/tgamma.rs
index 305986064..41415d9d1 100644
--- a/libm/src/math/tgamma.rs
+++ b/libm/src/math/tgamma.rs
@@ -131,7 +131,7 @@ fn s(x: f64) -> f64 {
 }
 
 /// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f64).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tgamma(mut x: f64) -> f64 {
     let u: u64 = x.to_bits();
     let absx: f64;
diff --git a/libm/src/math/tgammaf.rs b/libm/src/math/tgammaf.rs
index fe178f7a3..a63a2a318 100644
--- a/libm/src/math/tgammaf.rs
+++ b/libm/src/math/tgammaf.rs
@@ -1,7 +1,7 @@
 use super::tgamma;
 
 /// The [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) (f32).
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn tgammaf(x: f32) -> f32 {
     tgamma(x as f64) as f32
 }
diff --git a/libm/src/math/trunc.rs b/libm/src/math/trunc.rs
index fa50d55e1..20d52a111 100644
--- a/libm/src/math/trunc.rs
+++ b/libm/src/math/trunc.rs
@@ -2,7 +2,7 @@
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
 #[cfg(f16_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn truncf16(x: f16) -> f16 {
     super::generic::trunc(x)
 }
@@ -10,7 +10,7 @@ pub fn truncf16(x: f16) -> f16 {
 /// Rounds the number toward 0 to the closest integral value (f32).
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn truncf(x: f32) -> f32 {
     select_implementation! {
         name: truncf,
@@ -24,7 +24,7 @@ pub fn truncf(x: f32) -> f32 {
 /// Rounds the number toward 0 to the closest integral value (f64).
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn trunc(x: f64) -> f64 {
     select_implementation! {
         name: trunc,
@@ -39,7 +39,7 @@ pub fn trunc(x: f64) -> f64 {
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
 #[cfg(f128_enabled)]
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn truncf128(x: f128) -> f128 {
     super::generic::trunc(x)
 }

From 75ac1460d5c83f8563dc7d24128664be59b23ab6 Mon Sep 17 00:00:00 2001
From: Paul Murphy <murp@redhat.com>
Date: Mon, 4 Aug 2025 13:00:06 -0500
Subject: [PATCH 125/133] compiler-builtins: plumb LSE support for aarch64 on
 linux

Add dynamic support for aarch64 LSE atomic ops on linux targets
when optimized-compiler-builtins is not enabled.

A hook, __enable_rust_lse, is provided for the runtime to enable
them if available. A future patch will use this to enable them
if available.

The resulting asm should exactly match that of LLVM's compiler-rt
builtins, though the symbol naming for the support function and
global does not.
---
 compiler-builtins/src/aarch64_linux.rs | 76 ++++++++++++++++++++++----
 1 file changed, 66 insertions(+), 10 deletions(-)

diff --git a/compiler-builtins/src/aarch64_linux.rs b/compiler-builtins/src/aarch64_linux.rs
index 38fcab152..01d7fb473 100644
--- a/compiler-builtins/src/aarch64_linux.rs
+++ b/compiler-builtins/src/aarch64_linux.rs
@@ -6,9 +6,6 @@
 //! which is supported on the current CPU.
 //! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
 //!
-//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
-//! Use the `compiler-rt` intrinsics if you want LSE support.
-//!
 //! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
 //!
 //! Generate functions for each of the following symbols:
@@ -24,7 +21,18 @@
 //! We do something similar, but with macro arguments.
 #![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
 
-// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
+use core::sync::atomic::{AtomicU8, Ordering};
+
+/// non-zero if the host supports LSE atomics.
+static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0);
+
+intrinsics! {
+    /// Call to enable LSE in outline atomic operations. The caller must verify
+    /// LSE operations are supported.
+    pub extern "C" fn __rust_enable_lse() {
+        HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed);
+    }
+}
 
 /// Translate a byte size to a Rust type.
 #[rustfmt::skip]
@@ -45,6 +53,7 @@ macro_rules! reg {
     (2, $num:literal) => { concat!("w", $num) };
     (4, $num:literal) => { concat!("w", $num) };
     (8, $num:literal) => { concat!("x", $num) };
+    (16, $num:literal) => { concat!("x", $num) };
 }
 
 /// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
@@ -126,6 +135,41 @@ macro_rules! stxp {
     };
 }
 
+// If supported, perform the requested LSE op and return, or fallthrough.
+macro_rules! try_lse_op {
+    ($op: literal, $ordering:ident, $bytes:tt, $($reg:literal,)* [ $mem:ident ] ) => {
+        concat!(
+            ".arch_extension lse; ",
+            "adrp    x16, {have_lse}; ",
+            "ldrb    w16, [x16, :lo12:{have_lse}]; ",
+            "cbz     w16, 8f; ",
+            // LSE_OP  s(reg),* [$mem]
+            concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]; ",),
+            "ret; ",
+            "8:"
+        )
+    };
+}
+
+// Translate memory ordering to the LSE suffix
+#[rustfmt::skip]
+macro_rules! lse_mem_sfx {
+    (Relaxed) => { "" };
+    (Acquire) => { "a" };
+    (Release) => { "l" };
+    (AcqRel) => { "al" };
+}
+
+// Generate the aarch64 LSE operation for memory ordering and width
+macro_rules! lse {
+    ($op:literal, $order:ident, 16) => {
+        concat!($op, "p", lse_mem_sfx!($order))
+    };
+    ($op:literal, $order:ident, $bytes:tt) => {
+        concat!($op, lse_mem_sfx!($order), size!($bytes))
+    };
+}
+
 /// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
 macro_rules! compare_and_swap {
     ($ordering:ident, $bytes:tt, $name:ident) => {
@@ -137,7 +181,9 @@ macro_rules! compare_and_swap {
             ) -> int_ty!($bytes) {
                 // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
                 core::arch::naked_asm! {
-                    // UXT s(tmp0), s(0)
+                    // CAS    s(0), s(1), [x2]; if LSE supported.
+                    try_lse_op!("cas", $ordering, $bytes, 0, 1, [x2]),
+                    // UXT    s(tmp0), s(0)
                     concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
                     "0:",
                     // LDXR   s(0), [x2]
@@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
                     "cbnz   w17, 0b",
                     "1:",
                     "ret",
+                    have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
                 }
             }
         }
@@ -166,6 +213,8 @@ macro_rules! compare_and_swap_i128 {
                 expected: i128, desired: i128, ptr: *mut i128
             ) -> i128 {
                 core::arch::naked_asm! {
+                    // CASP   x0, x1, x2, x3, [x4]; if LSE supported.
+                    try_lse_op!("cas", $ordering, 16, 0, 1, 2, 3, [x4]),
                     "mov    x16, x0",
                     "mov    x17, x1",
                     "0:",
@@ -179,6 +228,7 @@ macro_rules! compare_and_swap_i128 {
                     "cbnz   w15, 0b",
                     "1:",
                     "ret",
+                    have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
                 }
             }
         }
@@ -195,6 +245,8 @@ macro_rules! swap {
                 left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
             ) -> int_ty!($bytes) {
                 core::arch::naked_asm! {
+                    // SWP    s(0), s(0), [x1]; if LSE supported.
+                    try_lse_op!("swp", $ordering, $bytes, 0, 0, [x1]),
                     // mov    s(tmp0), s(0)
                     concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
                     "0:",
@@ -204,6 +256,7 @@ macro_rules! swap {
                     concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
                     "cbnz   w17, 0b",
                     "ret",
+                    have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
                 }
             }
         }
@@ -212,7 +265,7 @@ macro_rules! swap {
 
 /// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
 macro_rules! fetch_op {
-    ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
+    ($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
         intrinsics! {
             #[maybe_use_optimized_c_shim]
             #[unsafe(naked)]
@@ -220,6 +273,8 @@ macro_rules! fetch_op {
                 val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
             ) -> int_ty!($bytes) {
                 core::arch::naked_asm! {
+                    // LSEOP  s(0), s(0), [x1]; if LSE supported.
+                    try_lse_op!($lse_op, $ordering, $bytes, 0, 0, [x1]),
                     // mov    s(tmp0), s(0)
                     concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
                     "0:",
@@ -231,6 +286,7 @@ macro_rules! fetch_op {
                     concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
                     "cbnz  w15, 0b",
                     "ret",
+                    have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
                 }
             }
         }
@@ -240,25 +296,25 @@ macro_rules! fetch_op {
 // We need a single macro to pass to `foreach_ldadd`.
 macro_rules! add {
     ($ordering:ident, $bytes:tt, $name:ident) => {
-        fetch_op! { $ordering, $bytes, $name, "add" }
+        fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
     };
 }
 
 macro_rules! and {
     ($ordering:ident, $bytes:tt, $name:ident) => {
-        fetch_op! { $ordering, $bytes, $name, "bic" }
+        fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
     };
 }
 
 macro_rules! xor {
     ($ordering:ident, $bytes:tt, $name:ident) => {
-        fetch_op! { $ordering, $bytes, $name, "eor" }
+        fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
     };
 }
 
 macro_rules! or {
     ($ordering:ident, $bytes:tt, $name:ident) => {
-        fetch_op! { $ordering, $bytes, $name, "orr" }
+        fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
     };
 }
 

From 87a66ec9699e5ddf2c660277b8078099efd01311 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 5 Aug 2025 20:56:27 +0000
Subject: [PATCH 126/133] configure: Use `CARGO_CFG_*_{F16,F128}` rather than
 invoking rustc

Currently we run the `rustc` from the `RUSTC` environment variable to
figure out whether or not to enable `f16` and `f128`, based on the
`target_has_reliable_{f16,f128}` config. However, this does not know
about the codegen backend used, and the backend isn't trivial to check
in a build script (usually it gets set via `RUSTFLAGS`).

It turns out we don't actually need to run `rustc` here: Cargo
unconditionally emits all config from the relevant compiler as
`CARGO_CFG_*` variables, regardless of whether or not they are known
options. Switch to checking these for setting config rather than
invoking `rustc`.

As an added advantage, this will work with target.json files without any
special handling.

Fixes: ed17b95715dd ("Use the compiler to determine whether or not to enable `f16` and `f128`")
---
 compiler-builtins/configure.rs | 27 ++++-----------------------
 libm/configure.rs              | 30 ++++++------------------------
 2 files changed, 10 insertions(+), 47 deletions(-)

diff --git a/compiler-builtins/configure.rs b/compiler-builtins/configure.rs
index caedc034d..79e238abc 100644
--- a/compiler-builtins/configure.rs
+++ b/compiler-builtins/configure.rs
@@ -1,6 +1,5 @@
 // Configuration that is shared between `compiler_builtins` and `builtins_test`.
 
-use std::process::{Command, Stdio};
 use std::{env, str};
 
 #[derive(Debug)]
@@ -35,26 +34,6 @@ impl Target {
             .map(|s| s.to_lowercase().replace("_", "-"))
             .collect();
 
-        // Query rustc for options that Cargo does not provide env for. The bootstrap hack is used
-        // to get consistent output regardless of channel (`f16`/`f128` config options are hidden
-        // on stable otherwise).
-        let mut cmd = Command::new(env::var("RUSTC").unwrap());
-        cmd.args(["--print=cfg", "--target", &triple])
-            .env("RUSTC_BOOTSTRAP", "1")
-            .stderr(Stdio::inherit());
-        let out = cmd
-            .output()
-            .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}"));
-        let rustc_cfg = str::from_utf8(&out.stdout).unwrap();
-
-        // If we couldn't query `rustc` (e.g. a custom JSON target was used), make the safe
-        // choice and leave `f16` and `f128` disabled.
-        let rustc_output_ok = out.status.success();
-        let reliable_f128 =
-            rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f128");
-        let reliable_f16 =
-            rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f16");
-
         Self {
             triple,
             triple_split,
@@ -74,8 +53,10 @@ impl Target {
                 .split(",")
                 .map(ToOwned::to_owned)
                 .collect(),
-            reliable_f128,
-            reliable_f16,
+            // Note that these are unstable options, so only show up with the nightly compiler or
+            // with `RUSTC_BOOTSTRAP=1` (which is required to use the types anyway).
+            reliable_f128: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F128").is_some(),
+            reliable_f16: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F16").is_some(),
         }
     }
 
diff --git a/libm/configure.rs b/libm/configure.rs
index f9100d2d5..76186e636 100644
--- a/libm/configure.rs
+++ b/libm/configure.rs
@@ -1,9 +1,9 @@
 // Configuration shared with both libm and libm-test
 
+use std::env;
 use std::path::PathBuf;
-use std::process::{Command, Stdio};
-use std::{env, str};
 
+#[derive(Debug)]
 #[allow(dead_code)]
 pub struct Config {
     pub manifest_dir: PathBuf,
@@ -33,26 +33,6 @@ impl Config {
             .map(|s| s.to_lowercase().replace("_", "-"))
             .collect();
 
-        // Query rustc for options that Cargo does not provide env for. The bootstrap hack is used
-        // to get consistent output regardless of channel (`f16`/`f128` config options are hidden
-        // on stable otherwise).
-        let mut cmd = Command::new(env::var("RUSTC").unwrap());
-        cmd.args(["--print=cfg", "--target", &target_triple])
-            .env("RUSTC_BOOTSTRAP", "1")
-            .stderr(Stdio::inherit());
-        let out = cmd
-            .output()
-            .unwrap_or_else(|e| panic!("failed to run `{cmd:?}`: {e}"));
-        let rustc_cfg = str::from_utf8(&out.stdout).unwrap();
-
-        // If we couldn't query `rustc` (e.g. a custom JSON target was used), make the safe
-        // choice and leave `f16` and `f128` disabled.
-        let rustc_output_ok = out.status.success();
-        let reliable_f128 =
-            rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f128");
-        let reliable_f16 =
-            rustc_output_ok && rustc_cfg.lines().any(|l| l == "target_has_reliable_f16");
-
         Self {
             target_triple,
             manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
@@ -66,8 +46,10 @@ impl Config {
             target_string: env::var("TARGET").unwrap(),
             target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
             target_features,
-            reliable_f128,
-            reliable_f16,
+            // Note that these are unstable options, so only show up with the nightly compiler or
+            // with `RUSTC_BOOTSTRAP=1` (which is required to use the types anyway).
+            reliable_f128: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F128").is_some(),
+            reliable_f16: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F16").is_some(),
         }
     }
 }

From 9caec5d5df877d79f89bee073c4a3eb2d979e7f6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 7 Aug 2025 01:05:01 -0500
Subject: [PATCH 127/133] symcheck: Store the section name in `SymInfo` if
 available

Currently `SymInfo` stores a `Section`, which is just an index:

    SymInfo {
        section: Section(
            SectionIndex(
                539,
            ),
        ),
        ...
    },

Look up and store the section name instead if possible, with a fallback
to the `Section` debug printing. This makes output more clear and will
allow us to filter by section name.
---
 crates/symbol-check/src/main.rs | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index 1312a7179..beb568a0f 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -9,7 +9,7 @@ use std::process::{Command, Stdio};
 
 use object::read::archive::{ArchiveFile, ArchiveMember};
 use object::{
-    File as ObjFile, Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection,
+    File as ObjFile, Object, ObjectSection, ObjectSymbol, Symbol, SymbolKind, SymbolScope,
 };
 use serde_json::Value;
 
@@ -154,7 +154,7 @@ struct SymInfo {
     name: String,
     kind: SymbolKind,
     scope: SymbolScope,
-    section: SymbolSection,
+    section: String,
     is_undefined: bool,
     is_global: bool,
     is_local: bool,
@@ -165,12 +165,22 @@ struct SymInfo {
 }
 
 impl SymInfo {
-    fn new(sym: &Symbol, member: &ArchiveMember) -> Self {
+    fn new(sym: &Symbol, obj: &ObjFile, member: &ArchiveMember) -> Self {
+        // Include the section name if possible. Fall back to the `Section` debug impl if not.
+        let section = sym.section();
+        let section_name = sym
+            .section()
+            .index()
+            .and_then(|idx| obj.section_by_index(idx).ok())
+            .and_then(|sec| sec.name().ok())
+            .map(ToString::to_string)
+            .unwrap_or_else(|| format!("{section:?}"));
+
         Self {
             name: sym.name().expect("missing name").to_owned(),
             kind: sym.kind(),
             scope: sym.scope(),
-            section: sym.section(),
+            section: section_name,
             is_undefined: sym.is_undefined(),
             is_global: sym.is_global(),
             is_local: sym.is_local(),
@@ -192,13 +202,13 @@ fn verify_no_duplicates(archive: &Archive) {
     let mut dups = Vec::new();
     let mut found_any = false;
 
-    archive.for_each_symbol(|symbol, member| {
+    archive.for_each_symbol(|symbol, obj, member| {
         // Only check defined globals
         if !symbol.is_global() || symbol.is_undefined() {
             return;
         }
 
-        let sym = SymInfo::new(&symbol, member);
+        let sym = SymInfo::new(&symbol, obj, member);
 
         // x86-32 includes multiple copies of thunk symbols
         if sym.name.starts_with("__x86.get_pc_thunk") {
@@ -244,7 +254,7 @@ fn verify_core_symbols(archive: &Archive) {
     let mut undefined = Vec::new();
     let mut has_symbols = false;
 
-    archive.for_each_symbol(|symbol, member| {
+    archive.for_each_symbol(|symbol, obj, member| {
         has_symbols = true;
 
         // Find only symbols from `core`
@@ -252,7 +262,7 @@ fn verify_core_symbols(archive: &Archive) {
             return;
         }
 
-        let sym = SymInfo::new(&symbol, member);
+        let sym = SymInfo::new(&symbol, obj, member);
         if sym.is_undefined {
             undefined.push(sym);
         } else {
@@ -304,9 +314,9 @@ impl Archive {
     }
 
     /// For a given archive, do something with each symbol.
-    fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ArchiveMember)) {
+    fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ObjFile, &ArchiveMember)) {
         self.for_each_object(|obj, member| {
-            obj.symbols().for_each(|sym| f(sym, member));
+            obj.symbols().for_each(|sym| f(sym, &obj, member));
         });
     }
 }

From e74519e782fe077ca967652567ad45db2d39da8c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 7 Aug 2025 03:27:16 -0500
Subject: [PATCH 128/133] symcheck: Ignore symbols in `.debug_gdb_scripts`

Since [1], our object files may now contain a GDB script section. These
symbols wind up with multiple instances in the archive but are weak, so
we can safely ignore them in our duplicates check.

This resolves the current CI failures.

[1]: https://github.com/rust-lang/rust/pull/143679
---
 crates/symbol-check/src/main.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index beb568a0f..129c959f2 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -215,6 +215,11 @@ fn verify_no_duplicates(archive: &Archive) {
             return;
         }
 
+        // GDB pretty printing symbols may show up more than once but are weak.
+        if sym.section == ".debug_gdb_scripts" && sym.is_weak {
+            return;
+        }
+
         // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo-
         // relocations. These are allowed to have repeated definitions.
         let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"];

From 6c8bf5aa57a132f439b3f5bce8d73b1d133540cb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 7 Aug 2025 03:42:02 -0500
Subject: [PATCH 129/133] Remove instances of `allow(improper_ctypes)`

i128/u128 haven't flagged `improper_ctypes` for a while, and this just
made it to stable [1]. Remove the `allow`s as they are no longer needed.

[1]: https://blog.rust-lang.org/2025/08/07/Rust-1.89.0/#i128-and-u128-in-extern-c-functions
---
 builtins-test/benches/float_conv.rs | 1 -
 compiler-builtins/src/lib.rs        | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/builtins-test/benches/float_conv.rs b/builtins-test/benches/float_conv.rs
index e0f488eb6..40c13d270 100644
--- a/builtins-test/benches/float_conv.rs
+++ b/builtins-test/benches/float_conv.rs
@@ -1,4 +1,3 @@
-#![allow(improper_ctypes)]
 #![cfg_attr(f128_enabled, feature(f128))]
 
 use builtins_test::float_bench;
diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs
index ca75f44e0..b111dc0bd 100644
--- a/compiler-builtins/src/lib.rs
+++ b/compiler-builtins/src/lib.rs
@@ -18,10 +18,6 @@
 #![no_std]
 #![allow(unused_features)]
 #![allow(internal_features)]
-// We use `u128` in a whole bunch of places which we currently agree with the
-// compiler on ABIs and such, so we should be "good enough" for now and changes
-// to the `u128` ABI will be reflected here.
-#![allow(improper_ctypes, improper_ctypes_definitions)]
 // `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code.
 #![allow(clippy::manual_swap)]
 // Support compiling on both stage0 and stage1 which may differ in supported stable features.

From 610e2d2c6ff78eb05fb97153358b8d9782d586a4 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 7 Aug 2025 15:45:52 -0500
Subject: [PATCH 130/133] Start runnning tests for aarch64-pc-windows-msvc

This target is currently build-only. Switch to the windows-11-arm
runner, which allows us to start running tests.
---
 .github/workflows/main.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index c54df2e90..3afadbfe8 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -51,8 +51,7 @@ jobs:
         - target: aarch64-unknown-linux-gnu
           os: ubuntu-24.04-arm
         - target: aarch64-pc-windows-msvc
-          os: windows-2025
-          build_only: 1
+          os: windows-11-arm
         - target: arm-unknown-linux-gnueabi
           os: ubuntu-24.04
         - target: arm-unknown-linux-gnueabihf

From 98d15801874d64ac2afa9bed9c9b6b79c484055c Mon Sep 17 00:00:00 2001
From: The rustc-josh-sync Cronjob Bot <github-actions@github.com>
Date: Sat, 9 Aug 2025 01:53:44 +0000
Subject: [PATCH 131/133] Prepare for merging from rust-lang/rust

This updates the rust-version file to ffb9d94dcf4ade0d534842be3672d5e9f47e1333.
---
 rust-version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust-version b/rust-version
index a4db05a87..3928504c8 100644
--- a/rust-version
+++ b/rust-version
@@ -1 +1 @@
-82310651b93a594a3fd69015e1562186a080d94c
+ffb9d94dcf4ade0d534842be3672d5e9f47e1333

From c944376dfcf14293b697aad44439951a62d3891d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 9 Aug 2025 15:42:48 -0500
Subject: [PATCH 132/133] symcheck: Skip `__ymm@` symbols on Windows

Like `__real@`, and `__xmm@`, Windows can emit duplicate `__ymm@`
symbols for constants.
---
 crates/symbol-check/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs
index 129c959f2..4e9455233 100644
--- a/crates/symbol-check/src/main.rs
+++ b/crates/symbol-check/src/main.rs
@@ -222,7 +222,7 @@ fn verify_no_duplicates(archive: &Archive) {
 
         // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo-
         // relocations. These are allowed to have repeated definitions.
-        let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"];
+        let win_allowed_dup_pfx = ["__real@", "__xmm@", "__ymm@", "??_C@_", ".refptr"];
         if win_allowed_dup_pfx
             .iter()
             .any(|pfx| sym.name.starts_with(pfx))

From 9c176c24e8b6295e2ba1c35d9713ef9e2d0055fb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sat, 9 Aug 2025 06:29:01 -0500
Subject: [PATCH 133/133] Add __addhf3, __subhf3, __mulhf3,
 __{eq,ge,gt,le,lt,ne,unord}hf2

LLVM does not currently emit these, but it is being discussed as an
option on platforms where `f32` is not hardware supported. Glibc/libgcc
also has the comparison functions [1] already.

The generic implementations for addition, subtraction, and
multiplication work for f16 without any complications, as do
comparisons, so add them here.

[1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=6ec6c77867af4ddfec7323e0ac6ede89effca852
---
 builtins-test/tests/addsub.rs      | 24 +++++++++++------------
 builtins-test/tests/cmp.rs         | 21 ++++++++++++++++++++
 builtins-test/tests/mul.rs         |  8 +++++++-
 compiler-builtins/src/float/add.rs |  5 +++++
 compiler-builtins/src/float/cmp.rs | 31 ++++++++++++++++++++++++++++++
 compiler-builtins/src/float/mul.rs |  5 +++++
 compiler-builtins/src/float/sub.rs |  5 +++++
 7 files changed, 85 insertions(+), 14 deletions(-)

diff --git a/builtins-test/tests/addsub.rs b/builtins-test/tests/addsub.rs
index abe7dde64..f3334bd0e 100644
--- a/builtins-test/tests/addsub.rs
+++ b/builtins-test/tests/addsub.rs
@@ -1,4 +1,5 @@
 #![allow(unused_macros)]
+#![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
 
 use builtins_test::*;
@@ -115,28 +116,25 @@ macro_rules! float_sum {
 mod float_addsub {
     use super::*;
 
+    #[cfg(f16_enabled)]
+    float_sum! {
+        f16, __addhf3, __subhf3, Half, all();
+    }
+
     float_sum! {
         f32, __addsf3, __subsf3, Single, all();
         f64, __adddf3, __subdf3, Double, all();
     }
-}
-
-#[cfg(f128_enabled)]
-#[cfg(not(x86_no_sse))]
-#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
-mod float_addsub_f128 {
-    use super::*;
 
+    #[cfg(f128_enabled)]
+    #[cfg(not(x86_no_sse))]
+    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     float_sum! {
         f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128");
     }
-}
-
-#[cfg(f128_enabled)]
-#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
-mod float_addsub_f128_ppc {
-    use super::*;
 
+    #[cfg(f128_enabled)]
+    #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
     float_sum! {
         f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128");
     }
diff --git a/builtins-test/tests/cmp.rs b/builtins-test/tests/cmp.rs
index a904dc5f7..4b01b6ca1 100644
--- a/builtins-test/tests/cmp.rs
+++ b/builtins-test/tests/cmp.rs
@@ -1,5 +1,6 @@
 #![allow(unused_macros)]
 #![allow(unreachable_code)]
+#![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
 
 use builtins_test::*;
@@ -51,6 +52,26 @@ mod float_comparisons {
         };
     }
 
+    #[test]
+    #[cfg(f16_enabled)]
+    fn cmp_f16() {
+        use compiler_builtins::float::cmp::{
+            __eqhf2, __gehf2, __gthf2, __lehf2, __lthf2, __nehf2, __unordhf2,
+        };
+
+        fuzz_float_2(N, |x: f16, y: f16| {
+            assert_eq!(__unordhf2(x, y) != 0, x.is_nan() || y.is_nan());
+            cmp!(f16, x, y, Half, all(),
+                1, __lthf2;
+                1, __lehf2;
+                1, __eqhf2;
+                -1, __gehf2;
+                -1, __gthf2;
+                1, __nehf2;
+            );
+        });
+    }
+
     #[test]
     fn cmp_f32() {
         use compiler_builtins::float::cmp::{
diff --git a/builtins-test/tests/mul.rs b/builtins-test/tests/mul.rs
index 3072b45dc..bbf1157db 100644
--- a/builtins-test/tests/mul.rs
+++ b/builtins-test/tests/mul.rs
@@ -1,5 +1,6 @@
-#![allow(unused_macros)]
+#![cfg_attr(f16_enabled, feature(f16))]
 #![cfg_attr(f128_enabled, feature(f128))]
+#![allow(unused_macros)]
 
 use builtins_test::*;
 
@@ -117,6 +118,11 @@ macro_rules! float_mul {
 mod float_mul {
     use super::*;
 
+    #[cfg(f16_enabled)]
+    float_mul! {
+        f16, __mulhf3, Half, all();
+    }
+
     // FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in
     // nightly.
     float_mul! {
diff --git a/compiler-builtins/src/float/add.rs b/compiler-builtins/src/float/add.rs
index 0cc362f70..8dbfb0e10 100644
--- a/compiler-builtins/src/float/add.rs
+++ b/compiler-builtins/src/float/add.rs
@@ -191,6 +191,11 @@ where
 }
 
 intrinsics! {
+    #[cfg(f16_enabled)]
+    pub extern "C" fn __addhf3(a: f16, b: f16) -> f16 {
+        add(a, b)
+    }
+
     #[aapcs_on_arm]
     #[arm_aeabi_alias = __aeabi_fadd]
     pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 {
diff --git a/compiler-builtins/src/float/cmp.rs b/compiler-builtins/src/float/cmp.rs
index f1e54dc1c..8ab39c2b5 100644
--- a/compiler-builtins/src/float/cmp.rs
+++ b/compiler-builtins/src/float/cmp.rs
@@ -115,6 +115,37 @@ fn unord<F: Float>(a: F, b: F) -> bool {
     a_abs > inf_rep || b_abs > inf_rep
 }
 
+#[cfg(f16_enabled)]
+intrinsics! {
+    pub extern "C" fn __lehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __gehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_ge_abi()
+    }
+
+    pub extern "C" fn __unordhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        unord(a, b) as crate::float::cmp::CmpResult
+    }
+
+    pub extern "C" fn __eqhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __lthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __nehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __gthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
+        cmp(a, b).to_ge_abi()
+    }
+}
+
 intrinsics! {
     pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
         cmp(a, b).to_le_abi()
diff --git a/compiler-builtins/src/float/mul.rs b/compiler-builtins/src/float/mul.rs
index dbed3095c..49a2414eb 100644
--- a/compiler-builtins/src/float/mul.rs
+++ b/compiler-builtins/src/float/mul.rs
@@ -180,6 +180,11 @@ where
 }
 
 intrinsics! {
+    #[cfg(f16_enabled)]
+    pub extern "C" fn __mulhf3(a: f16, b: f16) -> f16 {
+        mul(a, b)
+    }
+
     #[aapcs_on_arm]
     #[arm_aeabi_alias = __aeabi_fmul]
     pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {
diff --git a/compiler-builtins/src/float/sub.rs b/compiler-builtins/src/float/sub.rs
index a0fd9dff9..48ef33b0b 100644
--- a/compiler-builtins/src/float/sub.rs
+++ b/compiler-builtins/src/float/sub.rs
@@ -1,6 +1,11 @@
 use crate::float::Float;
 
 intrinsics! {
+    #[cfg(f16_enabled)]
+    pub extern "C" fn __subhf3(a: f16, b: f16) -> f16 {
+        crate::float::add::__addhf3(a, f16::from_bits(b.to_bits() ^ f16::SIGN_MASK))
+    }
+
     #[arm_aeabi_alias = __aeabi_fsub]
     pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 {
         crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK))